%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.88", %%% date = "18 January 2025", %%% time = "06:41:40 MST", %%% filename = "tcbb.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "62561 97233 499674 4859832", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "BibTeX; bibliography; IEEE/ACM Transactions %%% on Computational Biology and %%% Bioinformatics; TCBB", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% IEEE/ACM Transactions on Computational %%% Biology and Bioinformatics (CODEN ITCBCY, %%% ISSN 1545-5963 (print), 1557-9964 %%% (electronic)), covering all journal issues %%% from 2004 to date. %%% %%% At version 1.88, the COMPLETE journal %%% coverage looked like this: %%% %%% 2004 ( 23) 2011 ( 155) 2018 ( 189) %%% 2005 ( 37) 2012 ( 179) 2019 ( 199) %%% 2006 ( 41) 2013 ( 144) 2020 ( 150) %%% 2007 ( 69) 2014 ( 118) 2021 ( 180) %%% 2008 ( 59) 2015 ( 149) 2022 ( 180) %%% 2009 ( 69) 2016 ( 110) 2023 ( 172) %%% 2010 ( 75) 2017 ( 144) 2024 ( 135) %%% %%% Article: 2577 %%% %%% Total entries: 2577 %%% %%% The journal Web pages can be found at: %%% %%% http://www.acm.org/pubs/tcbb/ %%% http://portal.acm.org/browse_dl.cfm?idx=J954 %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% The initial draft was extracted from the ACM %%% Web pages. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% bibsource keys in the bibliography entries %%% below indicate the entry originally came %%% from the computer science bibliography %%% archive, even though it has likely since %%% been corrected and updated. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility." %%% } %%% ==================================================================== @Preamble{ "\input bibnames.sty" # "\hyphenation{Christ-o-dou-lou Dan-iel-la Dough-er-ty Giu-sep-pe Hab-tom Le-o-nar-do Ma-ran-go-ni Mee-nak-shi Pav-lo-vic Pro-ko-pen-ko Rez-ar-ta Ri-bei-ro Sid-da-ha-na-val-li Tei-xei-ra Ven-kat-es-wa-ran}" # "\ifx \undefined \bioname \def \bioname#1{{{\em #1\/}}} \fi" # "\ifx \undefined \poly \def \poly {{\rm poly}}\fi" # "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TCBB = "IEEE\slash ACM Transactions on Computational Biology and Bioinformatics"} %%% ==================================================================== %%% Bibliography entries: @Article{Williams:2004:WM, author = "Michael R. Williams", title = "Welcome Message", journal = j-TCBB, volume = "1", number = "1", pages = "1--1", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2004:IIA, author = "Dan Gusfield", title = "Introduction to the {IEEE\slash ACM Transactions on Computational Biology and Bioinformatics}", journal = j-TCBB, volume = "1", number = "1", pages = "2--3", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Williams:2004:INA, author = "Michael R. Williams", title = "Introduction of New {Associate Editors}", journal = j-TCBB, volume = "1", number = "1", pages = "4--12", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Moret:2004:PNM, author = "Bernard M. E. Moret and Luay Nakhleh and Tandy Warnow and C. Randal Linder and Anna Tholse and Anneke Padolina and Jerry Sun and Ruth Timme", title = "Phylogenetic Networks: Modeling, Reconstructibility, and Accuracy", journal = j-TCBB, volume = "1", number = "1", pages = "13--23", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Madeira:2004:BAB, author = "Sara C. Madeira and Arlindo L. Oliveira", title = "Biclustering Algorithms for Biological Data Analysis: a Survey", journal = j-TCBB, volume = "1", number = "1", pages = "24--45", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Preparata:2004:SHR, author = "Franco P. Preparata", title = "Sequencing-by-Hybridization Revisited: The Analog-Spectrum Proposal", journal = j-TCBB, volume = "1", number = "1", pages = "46--52", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hochsmann:2004:PMR, author = "Matthias H{\"o}chsmann and Bj{\"o}rn Voss and Robert Giegerich", title = "Pure Multiple {RNA} Secondary Structure Alignments: a Progressive Profile Approach", journal = j-TCBB, volume = "1", number = "1", pages = "53--62", month = jan, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2004:INA, author = "Anonymous", title = "Introduction of New {Associate Editor}", journal = j-TCBB, volume = "1", number = "2", pages = "65--65", month = apr, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Witwer:2004:PCR, author = "Christina Witwer and Ivo L. Hofacker and Peter F. Stadler", title = "Prediction of Consensus {RNA} Secondary Structures Including Pseudoknots", journal = j-TCBB, volume = "1", number = "2", pages = "66--77", month = apr, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bafna:2004:NRE, author = "Vineet Bafna and Vikas Bansal", title = "The Number of Recombination Events in a Sample History: Conflict Graph and Lower Bounds", journal = j-TCBB, volume = "1", number = "2", pages = "78--90", month = apr, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Raphael:2004:UPM, author = "Benjamin Raphael and Lung-Tien Liu and George Varghese", title = "A Uniform Projection Method for Motif Discovery in {DNA} Sequences", journal = j-TCBB, volume = "1", number = "2", pages = "91--94", month = apr, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Nov 22 06:42:56 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2004:INA, author = "Dan Gusfield", title = "Introduction of New {Associate Editors}", journal = j-TCBB, volume = "1", number = "3", pages = "97--97", month = jul, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Scheid:2004:SDS, author = "Stefanie Scheid and Rainer Spang", title = "A Stochastic Downhill Search Algorithm for Estimating the Local False Discovery Rate", journal = j-TCBB, volume = "1", number = "3", pages = "98--108", month = jul, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dress:2004:CSG, author = "Andreas W. M. Dress and Daniel H. Huson", title = "Constructing Splits Graphs", journal = j-TCBB, volume = "1", number = "3", pages = "109--115", month = jul, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cameron:2004:IGA, author = "Michael Cameron and Hugh E. Williams and Adam Cannane", title = "Improved Gapped Alignment in {BLAST}", journal = j-TCBB, volume = "1", number = "3", pages = "116--129", month = jul, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Evans:2004:UDT, author = "Steven N. Evans and Tandy Warnow", title = "Unidentifiable Divergence Times in Rates-across-Sites Models", journal = j-TCBB, volume = "1", number = "3", pages = "130--134", month = jul, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2004:GEW, author = "Junhyong Kim and Inge Jonassen", title = "Guest Editorial: {WABI} Special Section Part 1", journal = j-TCBB, volume = "1", number = "4", pages = "137--138", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Csuros:2004:MSS, author = "Miklos Csuros", title = "Maximum-Scoring Segment Sets", journal = j-TCBB, volume = "1", number = "4", pages = "139--150", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huson:2004:PSN, author = "Daniel H. Huson and Tobias Dezulian and Tobias Klopper and Mike A. Steel", title = "Phylogenetic Super-Networks from Partial Trees", journal = j-TCBB, volume = "1", number = "4", pages = "151--158", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bannai:2004:ADO, author = "Hideo Bannai and Heikki Hyyro and Ayumi Shinohara and Masayuki Takeda and Kenta Nakai and Satoru Miyano", title = "An {$ O(N^2) $} Algorithm for Discovering Optimal {Boolean} Pattern Pairs", journal = j-TCBB, volume = "1", number = "4", pages = "159--170", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gramm:2004:PTA, author = "Jens Gramm", title = "A Polynomial-Time Algorithm for the Matching of Crossing Contact-Map Patterns", journal = j-TCBB, volume = "1", number = "4", pages = "171--180", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ye:2004:UUD, author = "Jieping Ye and Tao Li and Tao Xiong and Ravi Janardan", title = "Using Uncorrelated Discriminant Analysis for Tissue Classification with Gene Expression Data", journal = j-TCBB, volume = "1", number = "4", pages = "181--190", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2004:AI, author = "Anonymous", title = "Annual Index", journal = j-TCBB, volume = "1", number = "4", pages = "191--192", month = oct, year = "2004", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jan 24 14:15:55 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2005:GEW, author = "Junhyong Kim and Inge Jonassen", title = "Guest Editorial: {WABI} Special Section. {Part II}", journal = j-TCBB, volume = "2", number = "1", pages = "1--2", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Allali:2005:NDH, author = "Julien Allali and Marie-France Sagot", title = "A New Distance for High Level {RNA} Secondary Structure Comparison", journal = j-TCBB, volume = "2", number = "1", pages = "3--14", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bertrand:2005:TRL, author = "Denis Bertrand and Olivier Gascuel", title = "Topological Rearrangements and Local Search Method for Tandem Duplication Trees", journal = j-TCBB, volume = "2", number = "1", pages = "15--28", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Brown:2005:OMS, author = "Daniel G. Brown", title = "Optimizing Multiple Seeds for Protein Homology Search", journal = j-TCBB, volume = "2", number = "1", pages = "29--38", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2005:EST, author = "Dan Gusfield", title = "Editorial-State of the Transaction", journal = j-TCBB, volume = "2", number = "1", pages = "39--39", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pisanti:2005:BMG, author = "Nadia Pisanti and Maxime Crochemore and Roberto Grossi and Marie-France Sagot", title = "Bases of Motifs for Generating Repeated Patterns with Wild Cards", journal = j-TCBB, volume = "2", number = "1", pages = "40--50", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kucherov:2005:MLF, author = "Gregory Kucherov and Laurent Noe and Mikhail Roytberg", title = "Multiseed Lossless Filtration", journal = j-TCBB, volume = "2", number = "1", pages = "51--61", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2005:TMB, author = "Ying Liu and Shamkant B. Navathe and Jorge Civera and Venu Dasigi and Ashwin Ram and Brian J. Ciliax and Ray Dingledine", title = "Text Mining Biomedical Literature for Discovering Gene-to-Gene Relationships: a Comparative Study of Algorithms", journal = j-TCBB, volume = "2", number = "1", pages = "62--76", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Staff:2005:RL, author = "{IEEE and ACM Transactions on Computational Biology and Bioinformatics staff}", title = "2004 Reviewers List", journal = j-TCBB, volume = "2", number = "1", pages = "77--77", month = jan, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Apr 12 07:11:54 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ling:2005:GEIa, author = "Charles X. Ling and William Stafford Noble and Qiang Yang", title = "{Guest Editors}' Introduction to the {Special Issue: Machine Learning for Bioinformatics---Part 1}", journal = j-TCBB, volume = "2", number = "2", pages = "81--82", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Au:2005:ACG, author = "Wai-Ho Au and Keith C. C. Chan and Andrew K. C. Wong and Yang Wang", title = "Attribute Clustering for Grouping, Selection, and Classification of Gene Expression Data", journal = j-TCBB, volume = "2", number = "2", pages = "83--101", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Biyani:2005:JCP, author = "Pravesh Biyani and Xiaolin Wu and Abhijit Sinha", title = "Joint Classification and Pairing of Human Chromosomes", journal = j-TCBB, volume = "2", number = "2", pages = "102--109", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Furlanello:2005:SLM, author = "Cesare Furlanello and Maria Serafini and Stefano Merler and Giuseppe Jurman", title = "Semisupervised Learning for Molecular Profiling", journal = j-TCBB, volume = "2", number = "2", pages = "110--118", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mamitsuka:2005:ELK, author = "Hiroshi Mamitsuka", title = "Essential Latent Knowledge for Protein-Protein Interactions: Analysis by an Unsupervised Learning Approach", journal = j-TCBB, volume = "2", number = "2", pages = "119--130", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rajapakse:2005:MED, author = "Jagath C. Rajapakse and Loi Sy Ho", title = "{Markov} Encoding for Detecting Signals in Genomic Sequences", journal = j-TCBB, volume = "2", number = "2", pages = "131--142", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rogers:2005:LPD, author = "Simon Rogers and Mark Girolami and Colin Campbell and Rainer Breitling", title = "The Latent Process Decomposition of {cDNA} Microarray Data Sets", journal = j-TCBB, volume = "2", number = "2", pages = "143--156", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2005:FRP, author = "Jinbo Xu", title = "Fold Recognition by Predicted Alignment Accuracy", journal = j-TCBB, volume = "2", number = "2", pages = "157--165", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shen:2005:DRB, author = "Li Shen and Eng Chong Tan", title = "Dimension Reduction-Based Penalized Logistic Regression for Cancer Classification Using Microarray Data", journal = j-TCBB, volume = "2", number = "2", pages = "166--175", month = apr, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 22 17:33:35 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ling:2005:GEIb, author = "C. X. Ling and W. S. Noble and Q. Yang", title = "{Guest Editor}'s Introduction to the {Special Issue: Machine Learning for Bioinformatics---Part 2}", journal = j-TCBB, volume = "2", number = "3", pages = "177--178", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Schliep:2005:AGE, author = "Alexander Schliep and Ivan G. Costa and Christine Steinhoff and Alexander Schonhuth", title = "Analyzing Gene Expression Time-Courses", journal = j-TCBB, volume = "2", number = "3", pages = "179--193", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kundaje:2005:CST, author = "Anshul Kundaje and Manuel Middendorf and Feng Gao and Chris Wiggins and Christina Leslie", title = "Combining Sequence and Time Series Expression Data to Learn Transcriptional Modules", journal = j-TCBB, volume = "2", number = "3", pages = "194--202", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kaski:2005:ACE, author = "Samuel Kaski and Janne Nikkila and Janne Sinkkonen and Leo Lahti and Juha E. A. Knuuttila and Christophe Roos", title = "Associative Clustering for Exploring Dependencies between Functional Genomics Data Sets", journal = j-TCBB, volume = "2", number = "3", pages = "203--216", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2005:PMF, author = "Jingfen Zhang and Wen Gao and Jinjin Cai and Simin He and Rong Zeng and Runsheng Chen", title = "Predicting Molecular Formulas of Fragment Ions with Isotope Patterns in Tandem Mass Spectra", journal = j-TCBB, volume = "2", number = "3", pages = "217--230", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Keedwell:2005:DGN, author = "Edward Keedwell and Ajit Narayanan", title = "Discovering Gene Networks with a Neural-Genetic Hybrid", journal = j-TCBB, volume = "2", number = "3", pages = "231--242", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hawkins:2005:ARN, author = "John Hawkins and Mikael Boden", title = "The Applicability of Recurrent Neural Networks for Biological Sequence Analysis", journal = j-TCBB, volume = "2", number = "3", pages = "243--253", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gustafsson:2005:CAL, author = "Mika Gustafsson and Michael Hornquist and Anna Lombardi", title = "Constructing and Analyzing a Large-Scale Gene-to-Gene Regulatory Network-Lasso-Constrained Inference and Biological Validation", journal = j-TCBB, volume = "2", number = "3", pages = "254--261", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Demir:2005:LTP, author = "Cigdem Demir and S. Humayun Gultekin and Bulent Yener", title = "Learning the Topological Properties of Brain Tumors", journal = j-TCBB, volume = "2", number = "3", pages = "262--270", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2005:CPS, author = "Anonymous", title = "Call for Papers for {Special Issue on Computational Intelligence Approaches in Computational Biology and Bioinformatics}", journal = j-TCBB, volume = "2", number = "3", pages = "271--271", month = jul, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Sep 20 06:11:25 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cickovski:2005:FTD, author = "Trevor M. Cickovski and Chengbang Huang and Rajiv Chaturvedi and Tilmann Glimm and H. George E. Hentschel and Mark S. Alber and James A. Glazier and Stuart A. Newman and Jesus A. Izaguirre", title = "A Framework for Three-Dimensional Simulation of Morphogenesis", journal = j-TCBB, volume = "2", number = "4", pages = "273--288", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Boscolo:2005:GFN, author = "Riccardo Boscolo and Chiara Sabatti and James C. Liao and Vwani P. Roychowdhury", title = "A Generalized Framework for Network Component Analysis", journal = j-TCBB, volume = "2", number = "4", pages = "289--301", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2005:AOG, author = "Xin Chen and Jie Zheng and Zheng Fu and Peng Nan and Yang Zhong and Stefano Lonardi and Tao Jiang", title = "Assignment of Orthologous Genes via Genome Rearrangement", journal = j-TCBB, volume = "2", number = "4", pages = "302--315", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Turner:2005:BMS, author = "Heather L. Turner and Trevor C. Bailey and Wojtek J. Krzanowski and Cheryl A. Hemingway", title = "Biclustering Models for Structured Microarray Data", journal = j-TCBB, volume = "2", number = "4", pages = "316--329", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sevilla:2005:CBG, author = "Jose L. Sevilla and Victor Segura and Adam Podhorski and Elizabeth Guruceaga and Jose M. Mato and Luis A. Martinez-Cruz and Fernando J. Corrales and Angel Rubio", title = "Correlation between Gene Expression and {GO} Semantic Similarity", journal = j-TCBB, volume = "2", number = "4", pages = "330--338", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yoon:2005:DCB, author = "Sungroh Yoon and Christine Nardini and Luca Benini and Giovanni De Micheli", title = "Discovering Coherent Biclusters from Gene Expression Data Using Zero-Suppressed Binary Decision Diagrams", journal = j-TCBB, volume = "2", number = "4", pages = "339--354", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tseng:2005:EMG, author = "Vincent S. Tseng and Ching-Pin Kao", title = "Efficiently Mining Gene Expression Data via a Novel Parameterless Clustering Method", journal = j-TCBB, volume = "2", number = "4", pages = "355--365", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2005:SGN, author = "Shaojie Zhang and Brian Haas and Eleazar Eskin and Vineet Bafna", title = "Searching Genomes for Noncoding {RNA} Using {FastR}", journal = j-TCBB, volume = "2", number = "4", pages = "366--379", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2005:AI, author = "Anonymous", title = "2005 Annual Index", journal = j-TCBB, volume = "2", number = "4", pages = "380--384", month = oct, year = "2005", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 18 05:22:15 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2006:SJ, author = "Dan Gusfield", title = "State of the Journal", journal = j-TCBB, volume = "3", number = "1", pages = "1--1", month = jan, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.12", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Berger:2006:JAG, author = "John A. Berger and Sampsa Hautaniemi and Sanjit K. Mitra and Jaakko Astola", title = "Jointly Analyzing Gene Expression and Copy Number Data in Breast Cancer Using Data Reduction Models", journal = j-TCBB, volume = "3", number = "1", pages = "2--16", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sebastian:2006:STA, author = "Rafael Sebastian and Maria-Elena Diaz and Guillermo Ayala and Kresimir Letinic and Jose Moncho-Bogani and Derek Toomre", title = "Spatio-Temporal Analysis of Constitutive Exocytosis in Epithelial Cells", journal = j-TCBB, volume = "3", number = "1", pages = "17--32", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hershkovitz:2006:SAR, author = "Eli Hershkovitz and Guillermo Sapiro and Allen Tannenbaum and Loren Dean Williams", title = "Statistical Analysis of {RNA} Backbone", journal = j-TCBB, volume = "3", number = "1", pages = "33--46", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dawy:2006:GMM, author = "Zaher Dawy and Bernhard Goebel and Joachim Hagenauer and Christophe Andreoli and Thomas Meitinger and Jakob C. Mueller", title = "Gene Mapping and Marker Clustering Using {Shannon}'s Mutual Information", journal = j-TCBB, volume = "3", number = "1", pages = "47--56", month = jan, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.9", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jun 7 15:19:59 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/bibnet/authors/s/shannon-claude-elwood.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Goutsias:2006:HMM, author = "John Goutsias", title = "A Hidden {Markov} Model for Transcriptional Regulation in Single Cells", journal = j-TCBB, volume = "3", number = "1", pages = "57--71", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rueda:2006:HCA, author = "Luis Rueda and Vidya Vidyadharan", title = "A Hill-Climbing Approach for Automatic Gridding of {cDNA} Microarray Images", journal = j-TCBB, volume = "3", number = "1", pages = "72--83", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Semple:2006:UNC, author = "Charles Semple and Mike Steel", title = "Unicyclic Networks: Compatibility and Enumeration", journal = j-TCBB, volume = "3", number = "1", pages = "84--91", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Roch:2006:SPP, author = "Sebastien Roch", title = "A Short Proof that Phylogenetic Tree Reconstruction by Maximum Likelihood Is Hard", journal = j-TCBB, volume = "3", number = "1", pages = "92--94", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2006:RL, author = "Anonymous", title = "2005 Reviewers List", journal = j-TCBB, volume = "3", number = "1", pages = "95--96", month = jan, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Feb 16 11:06:15 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2006:INA, author = "Dan Gusfield", title = "Introduction of New {Associate Editors}", journal = j-TCBB, volume = "3", number = "2", pages = "97--97", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.25", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chu:2006:BSM, author = "Wei Chu and Zoubin Ghahramani and Alexei Podtelezhnikov and David L. Wild", title = "{Bayesian} Segmental Models with Multiple Sequence Alignment Profiles for Protein Secondary Structure and Contact Map Prediction", journal = j-TCBB, volume = "3", number = "2", pages = "98--113", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.17", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Danziger:2006:FCM, author = "Samuel A. Danziger and S. Joshua Swamidass and Jue Zeng and Lawrence R. Dearth and Qiang Lu and Jonathan H. Chen and Jianlin Cheng and Vinh P. Hoang and Hiroto Saigo and Ray Luo and Pierre Baldi and Rainer K. Brachmann and Richard H. Lathrop", title = "Functional Census of Mutation Sequence Spaces: The Example of p53 Cancer Rescue Mutants", journal = j-TCBB, volume = "3", number = "2", pages = "114--125", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.22", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Carvalho:2006:EAI, author = "Alexandra M. Carvalho and Ana T. Freitas and Arlindo L. Oliveira and Marie-France Sagot", title = "An Efficient Algorithm for the Identification of Structured Motifs in {DNA} Promoter Sequences", journal = j-TCBB, volume = "3", number = "2", pages = "126--140", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.16", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Brown:2006:IPA, author = "Daniel G. Brown and Ian M. Harrower", title = "Integer Programming Approaches to Haplotype Inference by Pure Parsimony", journal = j-TCBB, volume = "3", number = "2", pages = "141--154", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.24", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vass:2006:JMB, author = "Marc T. Vass and Clifford A. Shaffer and Naren Ramakrishnan and Layne T. Watson and John J. Tyson", title = "The {JigCell} Model Builder: a Spreadsheet Interface for Creating Biochemical Reaction Network Models", journal = j-TCBB, volume = "3", number = "2", pages = "155--164", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.27", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2006:MFS, author = "Duhong Chen and Oliver Eulenstein and David Fernandez-Baca and Michael Sanderson", title = "Minimum-Flip Supertrees: Complexity and Algorithms", journal = j-TCBB, volume = "3", number = "2", pages = "165--173", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.26", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sevon:2006:TTP, author = "Petteri Sevon and Hannu Toivonen and Vesa Ollikainen", title = "{TreeDT}: Tree Pattern Mining for Gene Mapping", journal = j-TCBB, volume = "3", number = "2", pages = "174--185", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2006:CNS, author = "Yun S. Song", title = "A Concise Necessary and Sufficient Condition for the Existence of a Galled-Tree", journal = j-TCBB, volume = "3", number = "2", pages = "186--191", month = apr, year = "2006", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2006.15", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 7 06:38:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Galled-trees are a special class of graphical representation of evolutionary history that has proven amenable to efficient, polynomial-time algorithms. The goal of this paper is to construct a concise necessary and sufficient condition for the existence of a galled-tree for $M$, a set of binary sequences that purportedly have evolved in the presence of recombination. Both root-known and root-unknown cases are considered here.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Daras:2006:TDS, author = "Petros Daras and Dimitrios Zarpalas and Apostolos Axenopoulos and Dimitrios Tzovaras and Michael Gerassimos Strintzis", title = "Three-Dimensional Shape-Structure Comparison Method for Protein Classification", journal = j-TCBB, volume = "3", number = "3", pages = "193--207", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2006:MPA, author = "Weichuan Yu and Xiaoye Li and Junfeng Liu and Baolin Wu and Kenneth R. Williams and Hongyu Zhao", title = "Multiple Peak Alignment in Sequential Data Analysis: a Scale-Space-Based Approach", journal = j-TCBB, volume = "3", number = "3", pages = "208--219", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Abul:2006:PAE, author = "Osman Abul and Reda Alhajj and Faruk Polat", title = "A Powerful Approach for Effective Finding of Significantly Differentially Expressed Genes", journal = j-TCBB, volume = "3", number = "3", pages = "220--231", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nagarajan:2006:CSC, author = "Radhakrishnan Nagarajan and Meenakshi Upreti", title = "Correlation Statistics for {cDNA} Microarray Image Analysis", journal = j-TCBB, volume = "3", number = "3", pages = "232--238", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2006:CAP, author = "Yun S. Song and Rune Lyngso and Jotun Hein", title = "Counting All Possible Ancestral Configurations of Sample Sequences in Population Genetics", journal = j-TCBB, volume = "3", number = "3", pages = "239--251", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pirinen:2006:FCG, author = "Matti Pirinen and Dario Gasbarra", title = "Finding Consistent Gene Transmission Patterns on Large and Complex Pedigrees", journal = j-TCBB, volume = "3", number = "3", pages = "252--262", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Popescu:2006:FMG, author = "Mihail Popescu and James M. Keller and Joyce A. Mitchell", title = "Fuzzy Measures on the Gene Ontology for Gene Product Similarity", journal = j-TCBB, volume = "3", number = "3", pages = "263--274", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bernt:2006:GRB, author = "Matthias Bernt and Daniel Merkle and Martin Middendorf", title = "Genome Rearrangement Based on Reversals that Preserve Conserved Intervals", journal = j-TCBB, volume = "3", number = "3", pages = "275--288", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Berry:2006:IPC, author = "Vincent Berry and Fran{\c{c}}ois Nicolas", title = "Improved Parameterized Complexity of the Maximum Agreement Subtree and Maximum Compatible Tree Problems", journal = j-TCBB, volume = "3", number = "3", pages = "289--302", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sharan:2006:ITP, author = "Roded Sharan and Bjarni V. Halldorsson and Sorin Istrail", title = "Islands of Tractability for Parsimony Haplotyping", journal = j-TCBB, volume = "3", number = "3", pages = "303--311", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2006:SGR, author = "Chaolin Zhang and Xuesong Lu and Xuegong Zhang", title = "Significance of Gene Ranking for Classification of Microarray Samples", journal = j-TCBB, volume = "3", number = "3", pages = "312--320", month = jul, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Sep 11 07:36:29 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Casadio:2006:GEI, author = "Rita Casadio", title = "{Guest Editor}'s Introduction to the Special Issue on Computational Biology and Bioinformatics -- Part 1", journal = j-TCBB, volume = "3", number = "4", pages = "321--322", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Snir:2006:UMC, author = "Sagi Snir and Satish Rao", title = "Using Max Cut to Enhance Rooted Trees Consistency", journal = j-TCBB, volume = "3", number = "4", pages = "323--333", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ganapathy:2006:PIB, author = "Ganeshkumar Ganapathy and Barbara Goodson and Robert Jansen and Hai-son Le and Vijaya Ramachandran and Tandy Warnow", title = "Pattern Identification in Biogeography", journal = j-TCBB, volume = "3", number = "4", pages = "334--346", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wernicke:2006:EDN, author = "Sebastian Wernicke", title = "Efficient Detection of Network Motifs", journal = j-TCBB, volume = "3", number = "4", pages = "347--359", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lacroix:2006:MSG, author = "Vincent Lacroix and Cristina G. Fernandes and Marie-France Sagot", title = "Motif Search in Graphs: Application to Metabolic Networks", journal = j-TCBB, volume = "3", number = "4", pages = "360--368", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Elias:2006:AAS, author = "Isaac Elias and Tzvika Hartman", title = "A $ 1.375 $-Approximation Algorithm for Sorting by Transpositions", journal = j-TCBB, volume = "3", number = "4", pages = "369--379", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Labarre:2006:NBT, author = "Anthony Labarre", title = "New Bounds and Tractable Instances for the Transposition Distance", journal = j-TCBB, volume = "3", number = "4", pages = "380--394", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sammeth:2006:CTR, author = "Michael Sammeth and Jens Stoye", title = "Comparing Tandem Repeats with Duplications and Excisions of Variable Degree", journal = j-TCBB, volume = "3", number = "4", pages = "395--407", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bilu:2006:FAO, author = "Yonatan Bilu and Pankaj K. Agarwal and Rachel Kolodny", title = "Faster Algorithms for Optimal Multiple Sequence Alignment Based on Pairwise Comparisons", journal = j-TCBB, volume = "3", number = "4", pages = "408--422", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2006:EPA, author = "Yinglei Song and Chunmei Liu and Xiuzhen Huang and Russell L. Malmberg and Ying Xu and Liming Cai", title = "Efficient Parameterized Algorithms for Biopolymer Structure-Sequence Alignment", journal = j-TCBB, volume = "3", number = "4", pages = "423--432", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2006:AI, author = "Anonymous", title = "Annual Index", journal = j-TCBB, volume = "3", number = "4", pages = "??--??", month = oct, year = "2006", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 30 19:05:58 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2007:SJ, author = "Dan Gusfield", title = "State of the {Journal}", journal = j-TCBB, volume = "4", number = "1", pages = "1--1", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2007:AEAa, author = "Dan Gusfield", title = "{Associate Editor} Appreciation and Welcome", journal = j-TCBB, volume = "4", number = "1", pages = "2--2", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Casadio:2007:GEI, author = "Rita Casadio", title = "{Guest Editor}'s Introduction to the {Special Section on Computational Biology and Bioinformatics (WABI)} -- Part 2", journal = j-TCBB, volume = "4", number = "1", pages = "3--3", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Berard:2007:PSR, author = "Severine B{\'e}rard and Anne Bergeron and Cedric Chauve and Christophe Paul", title = "Perfect Sorting by Reversals Is Not Always Difficult", journal = j-TCBB, volume = "4", number = "1", pages = "4--16", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose new algorithms for computing pairwise rearrangement scenarios that conserve the combinatorial structure of genomes. More precisely, we investigate the problem of sorting signed permutations by reversals without breaking common intervals. We describe a combinatorial framework for this problem that allows us to characterize classes of signed permutations for which one can compute, in polynomial time, a shortest reversal scenario that conserves all common intervals. In particular, we define a class of permutations for which this computation can be done in linear time with a very simple algorithm that does not rely on the classical Hannenhalli-Pevzner theory for sorting by reversals. We apply these methods to the computation of rearrangement scenarios between permutations obtained from 16 synteny blocks of the X chromosomes of the human, mouse, and rat.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "common intervals; evolution scenarios; reversals", } @Article{Vashist:2007:OCM, author = "Akshay Vashist and Casimir A. Kulikowski and Ilya Muchnik", title = "Ortholog Clustering on a Multipartite Graph", journal = j-TCBB, volume = "4", number = "1", pages = "17--27", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a method for automatically extracting groups of orthologous genes from a large set of genomes by a new clustering algorithm on a weighted multipartite graph. The method assigns a score to an arbitrary subset of genes from multiple genomes to assess the orthologous relationships between genes in the subset. This score is computed using sequence similarities between the member genes and the phylogenetic relationship between the corresponding genomes. An ortholog cluster is found as the subset with the highest score, so ortholog clustering is formulated as a combinatorial optimization problem. The algorithm for finding an ortholog cluster runs in time $ O(|E| + |V| l o g|V|) $, where $V$ and $E$ are the sets of vertices and edges, respectively, in the graph. However, if we discretize the similarity scores into a constant number of bins, the runtime improves to $ O(|E| + |V|) $. The proposed method was applied to seven complete eukaryote genomes on which the manually curated database of eukaryotic ortholog clusters, KOG, is constructed. A comparison of our results with the manually curated ortholog clusters shows that our clusters are well correlated with the existing clusters.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology; clustering algorithms; genetics; Graph-theoretic methods", } @Article{Lasker:2007:EDH, author = "Keren Lasker and Oranit Dror and Maxim Shatsky and Ruth Nussinov and Haim J. Wolfson", title = "{EMatch}: Discovery of High Resolution Structural Homologues of Protein Domains in Intermediate Resolution Cryo-{EM} Maps", journal = j-TCBB, volume = "4", number = "1", pages = "28--39", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cryo-EM has become an increasingly powerful technique for elucidating the structure, dynamics, and function of large flexible macromolecule assemblies that cannot be determined at atomic resolution. However, due to the relatively low resolution of cryo-EM data, a major challenge is to identify components of complexes appearing in cryo-EM maps. Here, we describe EMatch, a novel integrated approach for recognizing structural homologues of protein domains present in a 6-10{\AA} resolution cryo-EM map and constructing a quasi-atomic structural model of their assembly. The method is highly efficient and has been successfully validated on various simulated data. The strength of the method is demonstrated by a domain assembly of an experimental cryo-EM map of native GroEL at 6{\AA} resolution.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "3D alignment of secondary structures; cyclic symmetry; intermediate resolution cryo-EM maps; macromolecular assemblies; structural bioinformatics", } @Article{Wang:2007:ACC, author = "Lipo Wang and Feng Chu and Wei Xie", title = "Accurate Cancer Classification Using Expressions of Very Few Genes", journal = j-TCBB, volume = "4", number = "1", pages = "40--53", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We aim at finding the smallest set of genes that can ensure highly accurate classification of cancers from microarray data by using supervised machine learning algorithms. The significance of finding the minimum gene subsets is three-fold: (1) It greatly reduces the computational burden and `noise' arising from irrelevant genes. In the examples studied in this paper, finding the minimum gene subsets even allows for extraction of simple diagnostic rules which lead to accurate diagnosis without the need for any classifiers. (2) It simplifies gene expression tests to include only a very small number of genes rather than thousands of genes, which can bring down the cost for cancer testing significantly. (3) It calls for further investigation into the possible biological relationship between these small numbers of genes and cancer development and treatment. Our simple yet very effective method involves two steps. In the first step, we choose some important genes using a feature importance ranking scheme. In the second step, we test the classification capability of all simple combinations of those important genes by using a good classifier. For three `small' and `simple' data sets with two, three, and four cancer (sub)types, our approach obtained very high accuracy with only two or three genes. For a `large' and `complex' data set with 14 cancer types, we divided the whole problem into a group of binary classification problems and applied the 2--step approach to each of these binary classification problems. Through this `divide-and-conquer' approach, we obtained accuracy comparable to previously reported results but with only 28 genes rather than 16,063 genes. In general, our method can significantly reduce the number of genes required for highly reliable diagnosis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "cancer classification; fuzzy; gene expression; neural networks; support vector machines.", } @Article{Zhi:2007:CBA, author = "Degui Zhi and Uri Keich and Pavel Pevzner and Steffen Heber and Haixu Tang", title = "Correcting Base-Assignment Errors in Repeat Regions of Shotgun Assembly", journal = j-TCBB, volume = "4", number = "1", pages = "54--64", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate base-assignment in repeat regions of a whole genome shotgun assembly is an unsolved problem. Since reads in repeat regions cannot be easily attributed to a unique location in the genome, current assemblers may place these reads arbitrarily. As a result, the base-assignment error rate in repeats is likely to be much higher than that in the rest of the genome. We developed an iterative algorithm, EULER-AIR, that is able to correct base-assignment errors in finished genome sequences in public databases. The Wolbachia genome is among the best finished genomes. Using this genome project as an example, we demonstrated that EULER-AIR can (1) discover and correct base-assignment errors, (2) provide accurate read assignments, (3) utilize finishing reads for accurate base-assignment, and (4) provide guidance for designing finishing experiments. In the genome of Wolbachia, EULER-AIR found 16 positions with ambiguous base-assignment and two positions with erroneous bases. Besides Wolbachia, many other genome sequencing projects have significantly fewer finishing reads and, hence, are likely to contain more base-assignment errors in repeats. We demonstrate that EULER-AIR is a software tool that can be used to find and correct base-assignment errors in a genome assembly project.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "expectation maximization; finishing; fragment assembly", } @Article{Xu:2007:MCC, author = "Rui Xu and Georgios C. Anagnostopoulos and Donald C. Wunsch", title = "Multiclass Cancer Classification Using Semisupervised Ellipsoid {ARTMAP} and Particle Swarm Optimization with Gene Expression Data", journal = j-TCBB, volume = "4", number = "1", pages = "65--77", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It is crucial for cancer diagnosis and treatment to accurately identify the site of origin of a tumor. With the emergence and rapid advancement of DNA microarray technologies, constructing gene expression profiles for different cancer types has already become a promising means for cancer classification. In addition to research on binary classification such as normal versus tumor samples, which attracts numerous efforts from a variety of disciplines, the discrimination of multiple tumor types is also important. Meanwhile, the selection of genes which are relevant to a certain cancer type not only improves the performance of the classifiers, but also provides molecular insights for treatment and drug development. Here, we use Semisupervised Ellipsoid ARTMAP (ssEAM) for multiclass cancer discrimination and particle swarm optimization for informative gene selection. ssEAM is a neural network architecture rooted in Adaptive Resonance Theory and suitable for classification tasks. ssEAM features fast, stable, and finite learning and creates hyperellipsoidal clusters, inducing complex nonlinear decision boundaries. PSO is an evolutionary algorithm-based technique for global optimization. A discrete binary version of PSO is employed to indicate whether genes are chosen or not. The effectiveness of ssEAM\slash PSO for multiclass cancer diagnosis is demonstrated by testing it on three publicly available multiple-class cancer data sets. ssEAM\slash PSO achieves competitive performance on all these data sets, with results comparable to or better than those obtained by other classifiers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "cancer classification; gene expression profile; particle swarm optimization; semisupervised ellipsoid ARTMAP", } @Article{Huang:2007:PPP, author = "Chengbang Huang and Faruck Morcos and Simon P. Kanaan and Stefan Wuchty and Danny Z. Chen and Jesus A. Izaguirre", title = "Predicting Protein-Protein Interactions from Protein Domains Using a Set Cover Approach", journal = j-TCBB, volume = "4", number = "1", pages = "78--87", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One goal of contemporary proteome research is the elucidation of cellular protein interactions. Based on currently available protein-protein interaction and domain data, we introduce a novel method, Maximum Specificity Set Cover (MSSC), for the prediction of protein-protein interactions. In our approach, we map the relationship between interactions of proteins and their corresponding domain architectures to a generalized weighted set cover problem. The application of a greedy algorithm provides sets of domain interactions which explain the presence of protein interactions to the largest degree of specificity. Utilizing domain and protein interaction data of {\em S. cerevisiae}, MSSC enables prediction of previously unknown protein interactions, links that are well supported by a high tendency of coexpression and functional homogeneity of the corresponding proteins. Focusing on concrete examples, we show that MSSC reliably predicts protein interactions in well-studied molecular systems, such as the 26S proteasome and RNA polymerase II of \bioname{S. cerevisiae}. We also show that the quality of the predictions is comparable to the Maximum Likelihood Estimation while MSSC is faster. This new algorithm and all data sets used are accessible through a Web portal at \path=http://ppi.cse.nd.edu=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics (genome or protein) databases; biology; Computations on discrete structures; genetics; graph algorithms", } @Article{Kim:2007:AAD, author = "Jong Hyun Kim and Michael S. Waterman and Lei M. Li", title = "Accuracy Assessment of Diploid Consensus Sequences", journal = j-TCBB, volume = "4", number = "1", pages = "88--97", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "If the origins of fragments are known in genome sequencing projects, it is straightforward to reconstruct diploid consensus sequences. In reality, however, this is not true. Although there are proposed methods to reconstruct haplotypes from genome sequencing projects, an accuracy assessment is required to evaluate the confidence of the estimated diploid consensus sequences. In this paper, we define the confidence score of diploid consensus sequences. It requires the calculation of the likelihood of an assembly. To calculate the likelihood, we propose a linear time algorithm with respect to the number of polymorphic sites. The likelihood calculation and confidence score are used for further improvements of haplotype estimation in two directions. One direction is that low-scored phases are disconnected. The other direction is that, instead of using nominal frequency 1/2, the haplotype frequency is estimated to reflect the actual contribution of each haplotype. Our method was evaluated on the simulated data whose polymorphism rate (1.2 percent) was based on Ciona intestinalis. As a result, the high accuracy of our algorithm was indicated: The true positive rate of the haplotype estimation was greater than 97 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "diploid; haplotype; polymorphism; shotgun sequencing", } @Article{Alekseyev:2007:CBG, author = "Max A. Alekseyev and Pavel A. Pevzner", title = "Colored {de Bruijn} Graphs and the Genome Halving Problem", journal = j-TCBB, volume = "4", number = "1", pages = "98--107", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Breakpoint graph analysis is a key algorithmic technique in studies of genome rearrangements. However, breakpoint graphs are defined only for genomes without duplicated genes, thus limiting their applications in rearrangement analysis. We discuss a connection between the breakpoint graphs and de Bruijn graphs that leads to a generalization of the notion of breakpoint graph for genomes with duplicated genes. We further use the generalized breakpoint graphs to study the Genome Halving Problem (first introduced and solved by Nadia El-Mabrouk and David Sankoff). The El-Mabrouk-Sankoff algorithm is rather complex, and, in this paper, we present an alternative approach that is based on generalized breakpoint graphs. The generalized breakpoint graphs make the El-Mabrouk-Sankoff result more transparent and promise to be useful in future studies of genome rearrangements.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "breakpoint graph; de Bruijn graph; genome duplication; genome halving; genome rearrangement; reversal", } @Article{Mossel:2007:DMT, author = "Elchanan Mossel", title = "Distorted Metrics on Trees and Phylogenetic Forests", journal = j-TCBB, volume = "4", number = "1", pages = "108--116", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study distorted metrics on binary trees in the context of phylogenetic reconstruction. Given a binary tree $T$ on $n$ leaves with a path metric $d$, consider the pairwise distances $ d(u, v) $ between leaves. It is well known that these determine the tree and the $d$-length of all edges. Here, we consider distortions $ \hat {d} $ of $d$ such that, for all leaves $u$ and $v$, it holds that $ |d(u, v) - \hat {d}(u, v)| < f / 2 $ if either $ d(u, v) < M + f / 2 $ or $ \hat {d}(u, v) < M + f / 2 $, where $d$ satisfies $ f \leq d(e) \leq g $ for all edges $e$. Given such distortions, we show how to reconstruct in polynomial time a forest $ T_1, \ldots {}, T_\alpha $ such that the true tree $T$ may be obtained from that forest by adding $ \alpha - 1 $ edges and $ \alpha - 1 \leq 2 - \Omega (M / g) n $. Our distorted metric result implies a reconstruction algorithm of phylogenetic forests with a small number of trees from sequences of length logarithmic in the number of species. The reconstruction algorithm is applicable for the general Markov model. Both the distorted metric result and its applications to phylogeny are almost tight.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "CFN; distortion; forest; Jukes--Cantor; metric; phylogenetics; tree", } @Article{Aeling:2007:DDE, author = "Kimberly A. Aeling and Nicholas R. Steffen and Matthew Johnson and G. Wesley Hatfield and Richard H. Lathrop and Donald F. Senear", title = "{DNA} Deformation Energy as an Indirect Recognition Mechanism in Protein-{DNA} Interactions", journal = j-TCBB, volume = "4", number = "1", pages = "117--125", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proteins that bind to specific locations in genomic DNA control many basic cellular functions. Proteins detect their binding sites using both direct and indirect recognition mechanisms. Deformation energy, which models the energy required to bend DNA from its native shape to its shape when bound to a protein, has been shown to be an indirect recognition mechanism for one particular protein, Integration Host Factor (IHF). This work extends the analysis of deformation to two other DNA-binding proteins, CRP and SRF, and two endonucleases, I-CreI and I-PpoI. Known binding sites for all five proteins showed statistically significant differences in mean deformation energy as compared to random sequences. Binding sites for the three DNA-binding proteins and one of the endonucleases had mean deformation energies lower than random sequences. Binding sites for I-PpoI had mean deformation energy higher than random sequences. Classifiers that were trained using the deformation energy at each base pair step showed good cross-validated accuracy when classifying unseen sequences as binders or nonbinders. These results support DNA deformation energy as an indirect recognition mechanism across a wider range of DNA-binding proteins. Deformation energy may also have a predictive capacity for the underlying catalytic mechanism of DNA-binding enzymes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "deformation energy; DNA bending; DNA-protein binding; indirect readout; indirect recognition; perceptron learning", } @Article{Yang:2007:MFE, author = "Jing Yang and Sarawan Wongsa and Visakan Kadirkamanathan and Stephen A. Billings and Phillip C. Wright", title = "Metabolic Flux Estimation --- a Self-Adaptive Evolutionary Algorithm with Singular Value Decomposition", journal = j-TCBB, volume = "4", number = "1", pages = "126--138", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Metabolic flux analysis is important for metabolic system regulation and intracellular pathway identification. A popular approach for intracellular flux estimation involves using $^{13}{\rm C}$ tracer experiments to label states that can be measured by nuclear magnetic resonance spectrometry or gas chromatography mass spectrometry. However, the bilinear balance equations derived from $^{13}{\rm C}$ tracer experiments and the noisy measurements require a nonlinear optimization approach to obtain the optimal solution. In this paper, the flux quantification problem is formulated as an error-minimization problem with equality and inequality constraints through the $^{13}{\rm C}$ balance and stoichiometric equations. The stoichiometric constraints are transformed to a null space by singular value decomposition. Self-adaptive evolutionary algorithms are then introduced for flux quantification. The performance of the evolutionary algorithm is compared with ordinary least squares estimation by the simulation of the central pentose phosphate pathway. The proposed algorithm is also applied to the central metabolism of Corynebacterium glutamicum under lysine-producing conditions. A comparison between the results from the proposed algorithm and data from the literature is given. The complexity of a metabolic system with bidirectional reactions is also investigated by analyzing the fluctuations in the flux estimates when available measurements are varied.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "evolutionary computing; least squares method; metabolic flux analysis; singular value decomposition.", } @Article{Wu:2007:QBP, author = "Gang Wu and Jia-Huai You and Guohui Lin", title = "Quartet-Based Phylogeny Reconstruction with Answer Set Programming", journal = j-TCBB, volume = "4", number = "1", pages = "139--152", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, a new representation is presented for the Maximum Quartet Consistency (MQC) problem, where solving the MQC problem becomes searching for an ultrametric matrix that satisfies a maximum number of given quartet topologies. A number of structural properties of the MQC problem in this new representation are characterized through formulating into answer set programming, a recent powerful logic programming tool for modeling and solving search problems. Using these properties, a number of optimization techniques are proposed to speed up the search process. The experimental results on a number of simulated data sets suggest that the new representation, combined with answer set programming, presents a unique perspective to the MQC problem.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Answer Set Programming (ASP); Maximum Quartet Consistency (MQC); phylogeny; quartet; ultrametric matrix.", } @Article{Reinert:2007:LLE, author = "Gesine Reinert and Michael S. Waterman", title = "On the Length of the Longest Exact Position Match in a Random Sequence", journal = j-TCBB, volume = "4", number = "1", pages = "153--156", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A mixed Poisson approximation and a Poisson approximation for the length of the longest exact match of a random sequence across another sequence are provided, where the match is required to start at position 1 in the first sequence. This problem arises when looking for suitable anchors in whole genome alignments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Chen-Stein method; length of longest match; mixed Poisson approximation; Poisson approximation", } @Article{Au:2007:CAC, author = "Wai-Ho Au and Keith C. C. Chan and Andrew K. C. Wong and Yang Wang", title = "Correction to {``Attribute Clustering for Grouping, Selection, and Classification of Gene Expression Data''}", journal = j-TCBB, volume = "4", number = "1", pages = "157--157", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This is a correction to a typographical error in (11) in [1] which present the calculation of the sum of the multiple significant interdependence redundancy measure. Equation (11) in [1] should be: $$ k = \arg \max \nolimits_{k \in \{ 2, \ldots, p \} } \sum_{r = 1}^k \sum_{A_i \in \{ C_r - \eta_r \} }R(A_i \colon \eta_r). $$ (11)We remark that the experimental results reported in [1] are based on (11) above not (11) in [1].", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Staff:2007:RL, author = "{IEEE and ACM Transactions on Computational Biology and Bioinformatics staff}", title = "2006 Reviewers List", journal = j-TCBB, volume = "4", number = "1", pages = "158--160", month = jan, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:20 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rajapakse:2007:GEI, author = "Jagath C. Rajapakse and Yan-Qing Zhang and Gary B. Fogel", title = "{Guest Editors}' Introduction to the {Special Section: Computational Intelligence Approaches in Computational Biology and Bioinformatics}", journal = j-TCBB, volume = "4", number = "2", pages = "161--162", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2007:PBS, author = "Haiying Wang and Huiru Zheng and Francisco Azuaje", title = "{Poisson}-Based Self-Organizing Feature Maps and Hierarchical Clustering for Serial Analysis of Gene Expression Data", journal = j-TCBB, volume = "4", number = "2", pages = "163--175", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Serial analysis of gene expression (SAGE) is a powerful technique for global gene expression profiling, allowing simultaneous analysis of thousands of transcripts without prior structural and functional knowledge. Pattern discovery and visualization have become fundamental approaches to analyzing such large-scale gene expression data. From the pattern discovery perspective, clustering techniques have received great attention. However, due to the statistical nature of SAGE data (i.e., underlying distribution), traditional clustering techniques may not be suitable for SAGE data analysis. Based on the adaptation and improvement of Self-Organizing Maps and hierarchical clustering techniques, this paper presents two new clustering algorithms, namely, PoissonS and PoissonHC, for SAGE data analysis. Tested on synthetic and experimental SAGE data, these algorithms demonstrate several advantages over traditional pattern discovery techniques. The results indicate that, by incorporating statistical properties of SAGE data, PoissonS and PoissonHC, as well as a hybrid approach (neuro-hierarchical approach) based on the combination of PoissonS and PoissonHC, offer significant improvements in pattern discovery and visualization for SAGE data. Moreover, a user-friendly platform, which may improve and accelerate SAGE data mining, was implemented. The system is freely available on request from the authors for nonprofit use.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "hybrid machine learning; Pattern discovery and visualization; Poisson distribution; self-organizing maps; serial analysis of gene expression.", } @Article{Sjahputera:2007:RAC, author = "Ozy Sjahputera and James M. Keller and J. Wade Davis and Kristen H. Taylor and Farahnaz Rahmatpanah and Huidong Shi and Derek T. Anderson and Samuel N. Blisard and Robert H. Luke and Mihail Popescu and Gerald C. Arthur and Charles W. Caldwell", title = "Relational Analysis of {CpG} Islands Methylation and Gene Expression in Human Lymphomas Using Possibilistic {C}-Means Clustering and Modified Cluster Fuzzy Density", journal = j-TCBB, volume = "4", number = "2", pages = "176--189", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Heterogeneous genetic and epigenetic alterations are commonly found in human non-Hodgkin's lymphomas (NHL). One such epigenetic alteration is aberrant methylation of gene promoter-related CpG islands, where hypermethylation frequently results in transcriptional inactivation of target genes, while a decrease or loss of promoter methylation (hypomethylation) is frequently associated with transcriptional activation. Discovering genes with these relationships in NHL or other types of cancers could lead to a better understanding of the pathobiology of these diseases. The simultaneous analysis of promoter methylation using Differential Methylation Hybridization (DMH) and its associated gene expression using Expressed CpG Island Sequence Tag (ECIST) microarrays generates a large volume of methylation-expression relational data. To analyze this data, we propose a set of algorithms based on fuzzy sets theory, in particular Possibilistic c-Means (PCM) and cluster fuzzy density. For each gene, these algorithms calculate measures of confidence of various methylation-expression relationships in each NHL subclass. Thus, these tools can be used as a means of high volume data exploration to better guide biological confirmation using independent molecular biology methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "cluster density; clustering; expression; fuzzy sets; Methylation; microarray", } @Article{Lu:2007:ISL, author = "Yijuan Lu and Qi Tian and Feng Liu and Maribel Sanchez and Yufeng Wang", title = "Interactive Semisupervised Learning for Microarray Analysis", journal = j-TCBB, volume = "4", number = "2", pages = "190--203", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microarray technology has generated vast amounts of gene expression data with distinct patterns. Based on the premise that genes of correlated functions tend to exhibit similar expression patterns, various machine learning methods have been applied to capture these specific patterns in microarray data. However, the discrepancy between the rich expression profiles and the limited knowledge of gene functions has been a major hurdle to the understanding of cellular networks. To bridge this gap so as to properly comprehend and interpret expression data, we introduce Relevance Feedback to microarray analysis and propose an interactive learning framework to incorporate the expert knowledge into the decision module. In order to find a good learning method and solve two intrinsic problems in microarray data, high dimensionality and small sample size, we also propose a semisupervised learning algorithm: Kernel Discriminant-EM (KDEM). This algorithm efficiently utilizes a large set of unlabeled data to compensate for the insufficiency of a small set of labeled data and it extends the linear algorithm in Discriminant-EM (DEM) to a kernel algorithm to handle nonlinearly separable data in a lower dimensional space. The Relevance Feedback technique and KDEM together construct an efficient and effective interactive semisupervised learning framework for microarray analysis. Extensive experiments on the yeast cell cycle regulation data set and Plasmodium falciparum red blood cell cycle data set show the promise of this approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "kernel DEM; microarray analysis; relevance feedback; semisupervised learning", } @Article{Lerner:2007:CSI, author = "Boaz Lerner and Josepha Yeshaya and Lev Koushnir", title = "On the Classification of a Small Imbalanced Cytogenetic Image Database", journal = j-TCBB, volume = "4", number = "2", pages = "204--215", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Solving a multiclass classification task using a small imbalanced database of patterns of high dimension is difficult due to the curse-of-dimensionality and the bias of the training toward the majority classes. Such a problem has arisen while diagnosing genetic abnormalities by classifying a small database of fluorescence in situ hybridization signals of types having different frequencies of occurrence. We propose and experimentally study using the cytogenetic domain two solutions to the problem. The first is hierarchical decomposition of the classification task, where each hierarchy level is designed to tackle a simpler problem which is represented by classes that are approximately balanced. The second solution is balancing the data by up-sampling the minority classes accompanied by dimensionality reduction. Implemented by the naive Bayesian classifier or the multilayer perceptron neural network, both solutions have diminished the problem and contributed to accuracy improvement. In addition, the experiments suggest that coping with the smallness of the data is more beneficial than dealing with its imbalance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "classification; dimensionality reduction; genetic diagnosis; imbalanced data; multilayer perceptron (MLP); naive Bayesian classifier (NBC); small sample size.", } @Article{Igel:2007:GBO, author = "Christian Igel and Tobias Glasmachers and Britta Mersch and Nico Pfeifer and Peter Meinicke", title = "Gradient-Based Optimization of Kernel-Target Alignment for Sequence Kernels Applied to Bacterial Gene Start Detection", journal = j-TCBB, volume = "4", number = "2", pages = "216--226", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological data mining using kernel methods can be improved by a task-specific choice of the kernel function. Oligo kernels for genomic sequence analysis have proven to have a high discriminative power and to provide interpretable results. Oligo kernels that consider subsequences of different lengths can be combined and parameterized to increase their flexibility. For adapting these parameters efficiently, gradient-based optimization of the kernel-target alignment is proposed. The power of this new, general model selection procedure and the benefits of fitting kernels to problem classes are demonstrated by adapting oligo kernels for bacterial gene start detection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "kernel target alignment; model selection; oligo kernel; sequence analysis; support vector machines; translation initiation sites", } @Article{Ogul:2007:SLP, author = "Hasan Ogul and Erkan U. Mumcuo{\u{g}}lu", title = "Subcellular Localization Prediction with New Protein Encoding Schemes", journal = j-TCBB, volume = "4", number = "2", pages = "227--232", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Subcellular localization is one of the key properties in functional annotation of proteins. Support vector machines (SVMs) have been widely used for automated prediction of subcellular localizations. Existing methods differ in the protein encoding schemes used. In this study, we present two methods for protein encoding to be used for SVM-based subcellular localization prediction: n{\hbox{-}}\rm peptide compositions with reduced amino acid alphabets for larger values of $n$ and pairwise sequence similarity scores based on whole sequence and N-terminal sequence. We tested the methods on a common benchmarking data set that consists of 2,427 eukaryotic proteins with four localization sites. As a result of 5-fold cross-validation tests, the encoding with n{\hbox{-}}\rm peptide compositions provided the accuracies of 84.5, 88.9, 66.3, and 94.3 percent for cytoplasmic, extracellular, mitochondrial, and nuclear proteins, where the overall accuracy was 87.1 percent. The second method provided 83.6, 87.7, 87.9, and 90.5 percent accuracies for individual locations and 87.8 percent overall accuracy. A hybrid system, which we called PredLOC, makes a final decision based on the results of the two presented methods which achieved an overall accuracy of 91.3 percent, which is better than the achievements of many of the existing methods. The new system also outperformed the recent methods in the experiments conducted on a new-unique SWISSPROT test set.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "n{\hbox{-}}\rm peptide composition; probabilistic suffix tree; subcellular localization; support vector machines.", } @Article{Li:2007:DSD, author = "Wenyuan Li and Ying Liu and Hung-Chung Huang and Yanxiong Peng and Yongjing Lin and Wee-Keong Ng and Kok-Leong Ong", title = "Dynamical Systems for Discovering Protein Complexes and Functional Modules from Biological Networks", journal = j-TCBB, volume = "4", number = "2", pages = "233--250", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent advances in high throughput experiments and annotations via published literature have provided a wealth of interaction maps of several biomolecular networks, including metabolic, protein-protein, and protein-DNA interaction networks. The architecture of these molecular networks reveals important principles of cellular organization and molecular functions. Analyzing such networks, i.e., discovering dense regions in the network, is an important way to identify protein complexes and functional modules. This task has been formulated as the problem of finding heavy subgraphs, the Heaviest k{\hbox{-}}\rm Subgraph Problem (k{\hbox{-}}\rm HSP), which itself is NP-hard. However, any method based on the k{\hbox{-}}\rm HSP requires the parameter $k$ and an exact solution of k{\hbox{-}}\rm HSP may still end up as a `spurious' heavy subgraph, thus reducing its practicability in analyzing large scale biological networks. We proposed a new formulation, called the rank-HSP, and two dynamical systems to approximate its results. In addition, a novel metric, called the Standard deviation and Mean Ratio (SMR), is proposed for use in `spurious' heavy subgraphs to automate the discovery by setting a fixed threshold. Empirical results on both the simulated graphs and biological networks have demonstrated the efficiency and effectiveness of our proposal.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics databases; evolutionary computing; Graph algorithms; neural nets", } @Article{Hu:2007:DMP, author = "Xiaohua Hu and Daniel D. Wu", title = "Data Mining and Predictive Modeling of Biomolecular Network from Biomedical Literature Databases", journal = j-TCBB, volume = "4", number = "2", pages = "251--263", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we present a novel approach Bio-IEDM (Biomedical Information Extraction and Data Mining) to integrate text mining and predictive modeling to analyze biomolecular network from biomedical literature databases. Our method consists of two phases. In phase 1, we discuss a semisupervised efficient learning approach to automatically extract biological relationships such as protein-protein interaction, protein-gene interaction from the biomedical literature databases to construct the biomolecular network. Our method automatically learns the patterns based on a few user seed tuples and then extracts new tuples from the biomedical literature based on the discovered patterns. The derived biomolecular network forms a large scale-free network graph. In phase 2, we present a novel clustering algorithm to analyze the biomolecular network graph to identify biologically meaningful subnetworks (communities). The clustering algorithm considers the characteristics of the scale-free network graphs and is based on the local density of the vertex and its neighborhood functions that can be used to find more meaningful clusters with different density level. The experimental results indicate our approach is very effective in extracting biological knowledge from a huge collection of biomedical literature. The integration of data mining and information extraction provides a promising direction for analyzing the biomolecular network.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biological complexes (communities); biomolecular network; information extraction; scale-free network; semisupervised learning", } @Article{Neri:2007:AMA, author = "Ferrante Neri and Jari Toivanen and Giuseppe Leonardo Cascella and Yew-Soon Ong", title = "An Adaptive Multimeme Algorithm for Designing {HIV} Multidrug Therapies", journal = j-TCBB, volume = "4", number = "2", pages = "264--278", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper proposes a period representation for modeling the multidrug HIV therapies and an Adaptive Multimeme Algorithm (AMmA) for designing the optimal therapy. The period representation offers benefits in terms of flexibility and reduction in dimensionality compared to the binary representation. The AMmA is a memetic algorithm which employs a list of three local searchers adaptively activated by an evolutionary framework. These local searchers, having different features according to the exploration logic and the pivot rule, have the role of exploring the decision space from different and complementary perspectives and, thus, assisting the standard evolutionary operators in the optimization process. Furthermore, the AMmA makes use of an adaptation which dynamically sets the algorithmic parameters in order to prevent stagnation and premature convergence. The numerical results demonstrate that the application of the proposed algorithm leads to very efficient medication schedules which quickly stimulate a strong immune response to HIV. The earlier termination of the medication schedule leads to lesser unpleasant side effects for the patient due to strong antiretroviral therapy. A numerical comparison shows that the AMmA is more efficient than three popular metaheuristics. Finally, a statistical test based on the calculation of the tolerance interval confirms the superiority of the AMmA compared to the other methods for the problem under study.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "adaptive algorithms; HIV therapy design; memetic algorithms; nonlinear integer programming.", } @Article{Handl:2007:MOB, author = "Julia Handl and Douglas B. Kell and Joshua Knowles", title = "Multiobjective Optimization in Bioinformatics and Computational Biology", journal = j-TCBB, volume = "4", number = "2", pages = "279--292", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper reviews the application of multiobjective optimization in the fields of bioinformatics and computational biology. A survey of existing work, organized by application area, forms the main body of the review, following an introduction to the key concepts in multiobjective optimization. An original contribution of the review is the identification of five distinct `contexts,' giving rise to multiple objectives: These are used to explain the reasons behind the use of multiobjective optimization in each application area and also to point the way to potential future uses of the technique.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics (genome or protein) databases; classification and association rules; clustering; experimental design; global optimization; interactive data exploration and discovery; machine learning", } @Article{Bontempi:2007:BSI, author = "Gianluca Bontempi", title = "A Blocking Strategy to Improve Gene Selection for Classification of Gene Expression Data", journal = j-TCBB, volume = "4", number = "2", pages = "293--300", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Because of high dimensionality, machine learning algorithms typically rely on feature selection techniques in order to perform effective classification in microarray gene expression data sets. However, the large number of features compared to the number of samples makes the task of feature selection computationally hard and prone to errors. This paper interprets feature selection as a task of stochastic optimization, where the goal is to select among an exponential number of alternative gene subsets the one expected to return the highest generalization in classification. Blocking is an experimental design strategy which produces similar experimental conditions to compare alternative stochastic configurations in order to be confident that observed differences in accuracy are due to actual differences rather than to fluctuations and noise effects. We propose an original blocking strategy for improving feature selection which aggregates in a paired way the validation outcomes of several learning algorithms to assess a gene subset and compare it to others. This is a novelty with respect to conventional wrappers, which commonly adopt a sole learning algorithm to evaluate the relevance of a given set of variables. The rationale of the approach is that, by increasing the amount of experimental conditions under which we validate a feature subset, we can lessen the problems related to the scarcity of samples and consequently come up with a better selection. The paper shows that the blocking strategy significantly improves the performance of a conventional forward selection for a set of 16 publicly available cancer expression data sets. The experiments involve six different classifiers and show that improvements take place independent of the classification algorithm used after the selection step. Two further validations based on available biological annotation support the claim that blocking strategies in feature selection may improve the accuracy and the quality of the solution. The first validation is based on retrieving PubMEd abstracts associated to the selected genes and matching them to regular expressions describing the biological phenomenon underlying the expression data sets. The biological validation that follows is based on the use of the Bioconductor package GoStats in order to perform Gene Ontology statistical analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics (genome or protein) databases; data mining; feature evaluation and selection; machine learning", } @Article{Diekmann:2007:EUR, author = "Yoan Diekmann and Marie-France Sagot and Eric Tannier", title = "Evolution under Reversals: Parsimony and Conservation of Common Intervals", journal = j-TCBB, volume = "4", number = "2", pages = "301--309", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In comparative genomics, gene order data is often modeled as signed permutations. A classical problem for genome comparison is to detect common intervals in permutations, that is, genes that are colocalized in several species, indicating that they remained grouped during evolution. A second largely studied problem related to gene order is to compute a minimum scenario of reversals that transforms a signed permutation into another. Several studies began to mix the two problems and it was observed that their results are not always compatible: Often, parsimonious scenarios of reversals break common intervals. If a scenario does not break any common interval, it is called perfect. In two recent studies, B{\'e}rard et al. defined a class of permutations for which building a perfect scenario of reversals sorting a permutation was achieved in polynomial time and stated as an open question whether it is possible to decide, given a permutation, if there exists a minimum scenario of reversals that is perfect. In this paper, we give a solution to this problem and prove that this widens the class of permutations addressed by the aforementioned studies. We implemented and tested this algorithm on gene order data of chromosomes from several mammal species and we compared it to other methods. The algorithm helps to choose among several possible scenarios of reversals and indicates that the minimum scenario of reversals is not always the most plausible.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "common intervals; computational biology; genome rearrangements; perfect sorting; signed permutations; sorting by reversals", } @Article{Weskamp:2007:MGA, author = "Nils Weskamp and Eyke Hullermeier and Daniel Kuhn and Gerhard Klebe", title = "Multiple Graph Alignment for the Structural Analysis of Protein Active Sites", journal = j-TCBB, volume = "4", number = "2", pages = "310--320", month = apr, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:57:55 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Graphs are frequently used to describe the geometry and also the physicochemical composition of protein active sites. Here, the concept of graph alignment as a novel method for the structural analysis of protein binding pockets is presented. Using inexact graph-matching techniques, one is able to identify both conserved areas and regions of difference among different binding pockets. Thus, using multiple graph alignments, it is possible to characterize functional protein families and to examine differences among related protein families independent of sequence or fold homology. Optimized algorithms are described for the efficient calculation of multiple graph alignments for the analysis of physicochemical descriptors representing protein binding pockets. Additionally, it is shown how the calculated graph alignments can be analyzed to identify structural features that are characteristic for a given protein family and also features that are discriminative among related families. The methods are applied to a substantial high-quality subset of the PDB database and their ability to successfully characterize and classify 10 highly populated functional protein families is shown. Additionally, two related protein families from the group of serine proteases are examined and important structural differences are detected automatically and efficiently.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "drug design; fuzzy patterns; graph mining; knowledge discovery in databases; structural pattern discovery", } @Article{Gusfield:2007:AEAb, author = "Dan Gusfield", title = "{Associate Editor} Appreciation and Welcome", journal = j-TCBB, volume = "4", number = "3", pages = "321--321", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fujarewicz:2007:ASM, author = "Krzysztof Fujarewicz and Marek Kimmel and Tomasz Lipniacki and Andrzej Swierniak", title = "Adjoint Systems for Models of Cell Signaling Pathways and their Application to Parameter Fitting", journal = j-TCBB, volume = "4", number = "3", pages = "322--335", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The paper concerns the problem of fitting mathematical models of cell signaling pathways. Such models frequently take the form of sets of nonlinear ordinary differential equations. While the model is continuous in time, the performance index used in the fitting procedure, involves measurements taken at discrete time moments. Adjoint sensitivity analysis is a tool, which can be used for finding the gradient of a performance index in the space of parameters of the model. In the paper a structural formulation of adjoint sensitivity analysis called the Generalized Backpropagation Through Time (GBPTT) is used. The method is especially suited for hybrid, continuous-discrete time systems. As an example we use the mathematical model of the NF-kB regulatory module, which plays a major role in the innate immune response in animals.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; modeling; ordinary differential equations; parameter learning", } @Article{Wan:2007:CCN, author = "Xiang Wan and Guohui Lin", title = "{CISA}: Combined {NMR} Resonance Connectivity Information Determination and Sequential Assignment", journal = j-TCBB, volume = "4", number = "3", pages = "336--348", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A nearly complete sequential resonance assignment is a key factor leading to successful protein structure determination via NMR spectroscopy. Assuming the availability of a set of NMR spectral peak lists, most of the existing assignment algorithms first use the differences between chemical shift values for common nuclei across multiple spectra to provide the evidence that some pairs of peaks should be assigned to sequentially adjacent amino acid residues in the target protein. They then use these connectivities as constraints to produce a sequential assignment. At various levels of success, these algorithms typically generate a large number of potential connectivity constraints, and it grows exponentially as the quality of spectral data decreases. A key observation used in our sequential assignment program, CISA, is that chemical shift residual signature information can be used to improve the connectivity determination, and thus to dramatically decrease the number of predicted connectivity constraints. Fewer connectivity constraints lead to less ambiguities in the sequential assignment. Extensive simulation studies on several large test datasets demonstrated that CISA is efficient and effective, compared to three most recently proposed sequential resonance assignment programs RANDOM, PACES, and MARS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "NMR sequential resonance assignment; spin system; spin system assignment; spin system residual signature; spin system sequential connectivity", } @Article{Cameron:2007:CCS, author = "Michael Cameron and Hugh Williams", title = "Comparing Compressed Sequences for Faster Nucleotide {BLAST} Searches", journal = j-TCBB, volume = "4", number = "3", pages = "349--364", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Molecular biologists, geneticists, and other life scientists use the BLAST homology search package as their first step for discovery of information about unknown or poorly annotated genomic sequences. There are two main variants of BLAST: BLASTP for searching protein collections and BLASTN for nucleotide collections. Surprisingly, BLASTN has had very little attention; for example, the algorithms it uses do not follow those described in the 1997 BLAST paper [1] and no exact description has been published. It is important that BLASTN is state-of-the-art: Nucleotide collections such as GenBank dwarf the protein collections in size, they double in size almost yearly, and they take many minutes to search on modern general purpose workstations. This paper proposes significant improvements to the BLASTN algorithms. Each of our schemes is based on compressed bytepacked formats that allow queries and collection sequences to be compared four bases at a time, permitting very fast query evaluation using lookup tables and numeric comparisons. Our most significant innovations are two new, fast gapped alignment schemes that allow accurate sequence alignment without decompression of the collection sequences. Overall, our innovations more than double the speed of BLASTN with no effect on accuracy and have been integrated into our new version of BLAST that is freely available for download from \path=http://www.fsa-blast.org/=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "BLAST; compression; Four Russians algorithm; homology search; sequence alignment", } @Article{Tang:2007:DTS, author = "Yuchun Tang and Yan-Qing Zhang and Zhen Huang", title = "Development of Two-Stage {SVM}-{RFE} Gene Selection Strategy for Microarray Expression Data Analysis", journal = j-TCBB, volume = "4", number = "3", pages = "365--381", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extracting a subset of informative genes from microarray expression data is a critical data preparation step in cancer classification and other biological function analyses. Though many algorithms have been developed, the Support Vector Machine - Recursive Feature Elimination (SVM-RFE) algorithm is one of the best gene feature selection algorithms. It assumes that a smaller `filter-out' factor in the SVM-RFE, which results in a smaller number of gene features eliminated in each recursion, should lead to extraction of a better gene subset. Because the SVM-RFE is highly sensitive to the `filter-out' factor, our simulations have shown that this assumption is not always correct and that the SVM-RFE is an unstable algorithm. To select a set of key gene features for reliable prediction of cancer types or subtypes and other applications, a new two-stage SVM-RFE algorithm has been developed. It is designed to effectively eliminate most of the irrelevant, redundant and noisy genes while keeping information loss small at the first stage. A fine selection for the final gene subset is then performed at the second stage. The two-stage SVM-RFE overcomes the instability problem of the SVM-RFE to achieve better algorithm utility. We have demonstrated that the two-stage SVM-RFE is significantly more accurate and more reliable than the SVM-RFE and three correlation-based methods based on our analysis of three publicly available microarray expression datasets. Furthermore, the two-stage SVM-RFE is computationally efficient because its time complexity is $ O(d * \log {_2d}) $, where $d$ is the size of the original gene set.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics; cancer classification; feature selection; gene selection; microarray gene expression data analysis; recursive feature elimination; support vector machines", } @Article{Ng:2007:NGW, author = "Lydia Ng and Sayan Pathak and Chihchau Kuan and Chris Lau and Hong-wei Dong and Andrew Sodt and Chinh Dang and Brian Avants and Paul Yushkevich and James Gee and David Haynor and Ed Lein and Allan Jones and Mike Hawrylycz", title = "Neuroinformatics for Genome-Wide {$3$-D} Gene Expression Mapping in the Mouse Brain", journal = j-TCBB, volume = "4", number = "3", pages = "382--393", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Large scale gene expression studies in the mammalian brain offer the promise of understanding the topology, networks and ultimately the function of its complex anatomy, opening previously unexplored avenues in neuroscience. High-throughput methods permit genome-wide searches to discover genes that are uniquely expressed in brain circuits and regions that control behavior. Previous gene expression mapping studies in model organisms have employed situ hybridization (ISH), a technique that uses labeled nucleic acid probes to bind to specific mRNA transcripts in tissue sections. A key requirement for this effort is the development of fast and robust algorithms for anatomically mapping and quantifying gene expression for ISH. We describe a neuroinformatics pipeline for automatically mapping expression profiles of ISH data and its use to produce the first genomic scale 3-D mapping of gene expression in a mammalian brain. The pipeline is fully automated and adaptable to other organisms and tissues. Our automated study of over 20,000 genes indicates that at least 78.8\% are expressed at some level in the adult C56BL/6J mouse brain. In addition to providing a platform for genomic scale search, high-resolution images and visualization tools for expression analysis are available at the Allen Brain Atlas web site (http://www.brain-map.org).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics (genome or protein) databases; data mining; information visualization; registration; segmentation", } @Article{Nguyen:2007:RRN, author = "C. Thach Nguyen and Nguyen Bao Nguyen and Wing-Kin Sung and Louxin Zhang", title = "Reconstructing Recombination Network from Sequence Data: The Small Parsimony Problem", journal = j-TCBB, volume = "4", number = "3", pages = "394--402", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The small parsimony problem is studied for reconstructing recombination networks from sequence data. The small parsimony problem is polynomial-time solvable for phylogenetic trees. However, the problem is proved NP-hard even for galled recombination networks. A dynamic programming algorithm is also developed to solve the small parsimony problem. It takes $ O(d n2^{3h}) $ time on an input recombination network over length-$d$ sequences in which there are $h$ recombination and $ n - h $ tree nodes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "approximability; combination network; dynamic programming; NP-hardness; parsimony method; phylogenetic network", } @Article{Lones:2007:RMD, author = "Michael Lones and Andy Tyrrell", title = "Regulatory Motif Discovery Using a Population Clustering Evolutionary Algorithm", journal = j-TCBB, volume = "4", number = "3", pages = "403--414", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper describes a novel evolutionary algorithm for regulatory motif discovery in DNA promoter sequences. The algorithm uses data clustering to logically distribute the evolving population across the search space. Mating then takes place within local regions of the population, promoting overall solution diversity and encouraging discovery of multiple solutions. Experiments using synthetic data sets have demonstrated the algorithm's capacity to find position frequency matrix models of known regulatory motifs in relatively long promoter sequences. These experiments have also shown the algorithm's ability to maintain diversity during search and discover multiple motifs within a single population. The utility of the algorithm for discovering motifs in real biological data is demonstrated by its ability to find meaningful motifs within muscle-specific regulatory sequences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "evolutionary computation; motif discovery; muscle-specific gene expression; population-based data clustering; transcription factor binding sites", } @Article{Yip:2007:SIS, author = "Andy M. Yip and Michael K. Ng and Edmond H. Wu and Tony F. Chan", title = "Strategies for Identifying Statistically Significant Dense Regions in Microarray Data", journal = j-TCBB, volume = "4", number = "3", pages = "415--429", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose and study the notion of dense regions for the analysis of categorized gene expression data and present some searching algorithms for discovering them. The algorithms can be applied to any categorical data matrices derived from gene expression level matrices. We demonstrate that dense regions are simple but useful and statistically significant patterns that can be used to (1) identify genes and/or samples of interest and (2) eliminate genes and/or samples corresponding to outliers, noise, or abnormalities. Some theoretical studies on the properties of the dense regions are presented which allow us to characterize dense regions into several classes and to derive tailor-made algorithms for different classes of regions. Moreover, an empirical simulation study on the distribution of the size of dense regions is carried out which is then used to assess the significance of dense regions and to derive effective pruning methods to speed up the searching algorithms. Real microarray data sets are employed to test our methods. Comparisons with six other well-known clustering algorithms using synthetic and real data are also conducted which confirm the superiority of our methods in discovering dense regions. The DRIFT code and a tutorial are available as supplemental material, which can be found on the Computer Society Digital Library at \path=http://computer.org/tcbb/archives.htm=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bicluster; categorical data; clustering; coexpressed genes; dense region; gene expression; microarray", } @Article{Liang:2007:BBD, author = "Kuo-ching Liang and Xiaodong Wang and Dimitris Anastassiou", title = "{Bayesian} Basecalling for {DNA} Sequence Analysis Using Hidden {Markov} Models", journal = j-TCBB, volume = "4", number = "3", pages = "430--440", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It has been shown that electropherograms of DNA sequences can be modeled with hidden Markov models. Basecalling, the procedure that determines the sequence of bases from the given eletropherogram, can then be performed using the Viterbi algorithm. A training step is required prior to basecalling in order to estimate the HMM parameters. In this paper, we propose a Bayesian approach which employs the Markov chain Monte Carlo (MCMC) method to perform basecalling. Such an approach not only allows one to naturally encode the prior biological knowledge into the basecalling algorithm, it also exploits both the training data and the basecalling data in estimating the HMM parameters, leading to more accurate estimates. Using the recently sequenced genome of the organism Legionella pneumophila we show that the MCMC basecaller outperforms the state-of-the-art basecalling algorithm in terms of total errors while requiring much less training than other proposed statistical basecallers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "basecalling; DNA sequencing; electropherogram; hidden Markov model (HMM); Markov chain Monte Carlo (MCMC)", } @Article{Thireou:2007:BLS, author = "Trias Thireou and Martin Reczko", title = "Bidirectional Long Short-Term Memory Networks for Predicting the Subcellular Localization of Eukaryotic Proteins", journal = j-TCBB, volume = "4", number = "3", pages = "441--446", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An algorithm called Bidirectional Long Short-Term Memory Networks (BLSTM) for processing sequential data is introduced. This supervised learning method trains a special recurrent neural network to use very long ranged symmetric sequence context using a combination of nonlinear processing elements and linear feedback loops for storing long-range context. The algorithm is applied to the sequence-based prediction of protein localization and predicts 93.3\% novel non-plant proteins and 88.4\% novel plant proteins correctly, which is an improvement over feedforward and standard recurrent networks solving the same problem. The BLSTM system is available as a web-service (http://www.stepc.gr/~synaptic/blstm.html).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biological sequence analysis; long short-term memory; protein subcellular localization prediction; recurrent neural networks", } @Article{Korodi:2007:CAN, author = "Gergely Korodi and Ioan Tabus", title = "Compression of Annotated Nucleotide Sequences", journal = j-TCBB, volume = "4", number = "3", pages = "447--457", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This article introduces an algorithm for the lossless compression of DNA files, which contain annotation text besides the nucleotide sequence. First a grammar is specifically designed to capture the regularities of the annotation text. A revertible transformation uses the grammar rules in order to equivalently represent the original file as a collection of parsed segments and a sequence of decisions made by the grammar parser. This decomposition enables the efficient use of state-of-the-art encoders for processing the parsed segments. The output size of the decision-making process of the grammar is optimized by extending the states to account for high-order Markovian dependencies. The practical implementation of the algorithm achieves a significant improvement when compared to the general-purpose methods currently used for DNA files.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "4 [Data]: Coding and Information Theory | Data compaction and compression; Annotation; Compression; F.4 [Theory of Computation]: Mathematical Logic and Formal Languages | Formal languages; Formal Grammars; G.3 [Mathematics of Computing]: Probability and Statistics | Markov processes; J.3 [Computer Applications]: Life and Medical Sciences | Biology and genetics; nucleotide sequences", } @Article{Bordewich:2007:CHN, author = "Magnus Bordewich and Charles Semple", title = "Computing the Hybridization Number of Two Phylogenetic Trees Is Fixed-Parameter Tractable", journal = j-TCBB, volume = "4", number = "3", pages = "458--466", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reticulation processes in evolution mean that the ancestral history of certain groups of present-day species is non-tree-like. These processes include hybridization, lateral gene transfer, and recombination. Despite the existence of reticulation, such events are relatively rare and so a fundamental problem for biologists is the following: given a collection of rooted binary phylogenetic trees on sets of species that correctly represent the tree-like evolution of different parts of their genomes, what is the smallest number of `reticulation' vertices in any network that explains the evolution of the species under consideration. It has been previously shown that this problem is NP-hard even when the collection consists of only two rooted binary phylogenetic trees. However, in this paper, we show that the problem is fixed-parameter tractable in the two-tree instance, when parameterized by this smallest number of reticulation vertices.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "agreement forest; hybridization network; reticulate evolution; rooted phylogenetic tree; subtree prune and regraft", } @Article{Huang:2007:EGS, author = "D. Huang and Tommy Chow", title = "Effective Gene Selection Method With Small Sample Sets Using Gradient-Based and Point Injection Techniques", journal = j-TCBB, volume = "4", number = "3", pages = "467--475", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microarray gene expression data usually consist of a large amount of genes. Among these genes, only a small fraction is informative for performing cancer diagnostic test. This paper focuses on effective identification of informative genes. We analyze gene selection models from the perspective of optimization theory. As a result, a new strategy is designed to modify conventional search engines. Also, as overfitting is likely to occur in microarray data because of their small sample set, a point injection technique is developed to address the problem of overfitting. The proposed strategies have been evaluated on three kinds of cancer diagnosis. Our results show that the proposed strategies can improve the performance of gene selection substantially. The experimental results also indicate that the proposed methods are very robust under all the investigated cases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "gene selection; gradient based learning; optimization theory; point injection", } @Article{Hecht:2007:HTL, author = "David Hecht and Gary Fogel", title = "High-Throughput Ligand Screening via Preclustering and Evolved Neural Networks", journal = j-TCBB, volume = "4", number = "3", pages = "476--484", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The pathway for novel lead drug discovery has many major deficiencies, the most significant of which is the immense size of small molecule diversity space. Methods that increase the search efficiency and/or reduce the size of the search space, increase the rate at which useful lead compounds are identified. Artificial neural networks optimized via evolutionary computation provide a cost and time-effective solution to this problem. Here, we present results that suggest preclustering of small molecules prior to neural network optimization is useful for generating models of quantitative structure-activity relationships for a set of HIV inhibitors. Using these methods, it is possible to prescreen compounds to separate active from inactive compounds or even actives and mildly active compounds from inactive compounds with high predictive accuracy while simultaneously reducing the feature space. It is also possible to identify `human interpretable' features from the best models that can be used for proposal and synthesis of new compounds in order to optimize potency and specificity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "artificial neural networks; computational intelligence; evolutionary computation; medicine and science", } @Article{Zhang:2007:MCU, author = "Runxuan Zhang and Guang-Bin Huang and N. Sundararajan and P. Saratchandran", title = "Multicategory Classification Using An Extreme Learning Machine for Microarray Gene Expression Cancer Diagnosis", journal = j-TCBB, volume = "4", number = "3", pages = "485--495", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, the recently developed Extreme Learning Machine (ELM) is used for direct multicategory classification problems in the cancer diagnosis area. ELM avoids problems like local minima, improper learning rate and overfitting commonly faced by iterative learning methods and completes the training very fast. We have evaluated the multi-category classification performance of ELM on three benchmark microarray datasets for cancer diagnosis, namely, the GCM dataset, the Lung dataset and the Lymphoma dataset. The results indicate that ELM produces comparable or better classification accuracies with reduced training time and implementation complexity compared to artificial neural networks methods like conventional back-propagation ANN, Linder's SANN, and Support Vector Machine methods like SVM-OVO and Ramaswamy's SVM-OVA. ELM also achieves better accuracies for classification of individual categories.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "extreme learning machine; gene expression; microarray; multi-category classification; SVM", } @Article{Zhang:2007:SSS, author = "Louxin Zhang", title = "Superiority of Spaced Seeds for Homology Search", journal = j-TCBB, volume = "4", number = "3", pages = "496--505", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In homology search, good spaced seeds have higher sensitivity for the same cost (weight). However, elucidating the mechanism that confers power to spaced seeds and characterizing optimal spaced seeds still remain unsolved. This paper investigates these two important open questions by formally analyzing the average number of non-overlapping hits and the hit probability of a spaced seed in the Bernoulli sequence model. We prove that when the length of a non-uniformly spaced seed is bounded above by an exponential function of the seed weight, the seed outperforms strictly the traditional consecutive seed of the same weight in both (i) the average number of non-overlapping hits and (ii) the asymptotic hit probability. This clearly answers the first problem mentioned above in the Bernoulli sequence model. The theoretical study in this paper also gives a new solution to finding long optimal seeds.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "homology search; pattern matching; renewal theory; run statistics; sequence alignment; spaced seeds", } @Article{Matsen:2007:OCT, author = "Frederick Matsen", title = "Optimization Over a Class of Tree Shape Statistics", journal = j-TCBB, volume = "4", number = "3", pages = "506--512", month = jul, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:24 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tree shape statistics quantify some aspect of the shape of a phylogenetic tree. They are commonly used to compare reconstructed trees to evolutionary models and to find evidence of tree reconstruction bias. Historically, to find a useful tree shape statistic, formulas have been invented by hand and then evaluated for utility. This article presents the first method which is capable of optimizing over a class of tree shape statistics, called Binary Recursive Tree Shape Statistics (BRTSS). After defining the BRTSS class, a set of algebraic expressions is defined which can be used in the recursions. The tree shape statistics definable using these expressions in the BRTSS is very general, and includes many of the statistics with which phylogenetic researchers are already familiar. We then present a practical genetic algorithm which is capable of performing optimization over BRTSS given any objective function. The chapter concludes with a successful application of the methods to find a new statistic which indicates a significant difference between two distributions on trees which were previously postulated to have similar properties.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; evolutionary computing and genetic algorithms", } @Article{Mandoiu:2007:GEI, author = "Ion I. M{\~a}ndoiu and Yi Pan and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the {Special Section on Bioinformatics Research and Applications}", journal = j-TCBB, volume = "4", number = "4", pages = "513--514", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2007:RNA, author = "Chunfang Zheng and Qian Zhu and David Sankoff", title = "Removing Noise and Ambiguities from Comparative Maps in Rearrangement Analysis", journal = j-TCBB, volume = "4", number = "4", pages = "515--522", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Comparison of genomic maps is hampered by errors and ambiguities introduced by mapping technology, incorrectly resolved paralogy, small samples of markers and extensive genome rearrangement. We design an analysis to remove or resolve most of these problems and to extract corrected data where markers occur in consecutive strips in both genomes. To do this we introduce the notion of pre-strip, an efficient way of generating these, and a compatibility analysis culminating in a Maximum Weighted Clique (MWC) search. The output can be directly analyzed with genome rearrangement algorithms, allowing the restoration of some of the data not incorporated into the clique solution. We investigate the trade-off between criteria for discarding excessive pre-strips to make MWC feasible, in terms of retaining as many markers as possible in the solution and producing an economical rearrangement analysis. We explore these questions through simulation and through comparison of the rice and sorghum genomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "genome rearrangements; Maximum Weight Clique; rice; sorghum; synteny blocks", } @Article{Blin:2007:CGD, author = "Guillaume Blin and Cedric Chauve and Guillaume Fertin and Romeo Rizzi and Stephane Vialette", title = "Comparing Genomes with Duplications: a Computational Complexity Point of View", journal = j-TCBB, volume = "4", number = "4", pages = "523--534", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we are interested in the computational complexity of computing (dis)similarity measures between two genomes when they contain duplicated genes or genomic markers, a problem that happens frequently when comparing whole nuclear genomes. Recently, several methods ([1], [2]) have been proposed that are based on two steps to compute a given (dis)similarity measure $M$ between two genomes $ G_1 $ and $ G_2 $: first, one establishes a one-to-one correspondence between genes of $ G_1 $ and genes of $ G_2 $; second, once this correspondence is established, it defines explicitly a permutation and it is then possible to quantify their similarity using classical measures defined for permutations, like the number of breakpoints. Hence these methods rely on two elements: a way to establish a one-to-one correspondence between genes of a pair of genomes, and a (dis)similarity measure for permutations. The problem is then, given a (dis)similarity measure for permutations, to compute a correspondence that defines an optimal permutation for this measure. We are interested here in two models to compute a one-to-one correspondence: the exemplar model, where all but one copy are deleted in both genomes for each gene family, and the matching model, that computes a maximal correspondence for each gene family. We show that for these two models, and for three (dis)similarity measures on permutations, namely the number of common intervals, the maximum adjacency disruption (MAD) number and the summed adjacency disruption (SAD) number, the problem of computing an optimal correspondence is NP-complete, and even APXhard for the MAD number and SAD number.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "common intervals; comparative genomics; computational complexity; maximum adjacency disruption number; summed adjacency disruption number", } @Article{Bonizzoni:2007:ELC, author = "Paola Bonizzoni and Gianluca Della Vedova and Riccardo Dondi and Guillaume Fertin and Raffaella Rizzi and Stephane Vialette", title = "Exemplar Longest Common Subsequence", journal = j-TCBB, volume = "4", number = "4", pages = "535--543", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we investigate the computational and approximation complexity of the Exemplar Longest Common Subsequence of a set of sequences (ELCS problem), a generalization of the Longest Common Subsequence problem, where the input sequences are over the union of two disjoint sets of symbols, a set of mandatory symbols and a set of optional symbols. We show that different versions of the problem are APX-hard even for instances with two sequences. Moreover, we show that the related problem of determining the existence of a feasible solution of the Exemplar Longest Common Subsequence of two sequences is NP-hard. On the positive side, we first present an efficient algorithm for the ELCS problem over instances of two sequences where each mandatory symbol can appear in total at most three times in the sequences. Furthermore, we present two fixed-parameter algorithms for the ELCS problem over instances of two sequences where the parameter is the number of mandatory symbols.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithm design and analysis; analysis of algorithms and problem complexity; combinatorial algorithms; comparative genomics; longest common subsequence", } @Article{Davila:2007:FPA, author = "Jaime Davila and Sudha Balla and Sanguthevar Rajasekaran", title = "Fast and Practical Algorithms for Planted $ (l, d) $ Motif Search", journal = j-TCBB, volume = "4", number = "4", pages = "544--552", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider the planted $ (l, d) $ motif search problem, which consists of finding a substring of length $l$ that occurs in a set of input sequences $ \{ s_1, \ldots {}, s_n \} $ with up to $d$ errors, a problem that arises from the need to find transcription factor-binding sites in genomic information. We propose a sequence of practical algorithms, which start based on the ideas considered in PMS1. These algorithms are exact, have little space requirements, and are able to tackle challenging instances with bigger $d$, taking less time in the instances reported solved by exact algorithms. In particular, one of the proposed algorithms, PMSprune, is able to solve the challenging instances, such as (17, 6) and (19, 7), which were not previously reported as solved in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "branch and bound algorithms; challenging instances; exact algorithms; planted motif search problem", } @Article{Schneider:2007:SDM, author = "Adrian Schneider and Gaston Gonnet and Gina Cannarozzi", title = "{SynPAM---A} Distance Measure Based on Synonymous Codon Substitutions", journal = j-TCBB, volume = "4", number = "4", pages = "553--560", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Measuring evolutionary distances between DNA or protein sequences forms the basis of many applications in computational biology and evolutionary studies. Of particular interest are distances based on synonymous substitutions, since these substitutions are considered to be under very little selection pressure and therefore assumed to accumulate in an almost clock-like manner. SynPAM, the method presented here, allows the estimation of distances between coding DNA sequences based on synonymous codon substitutions. The problem of estimating an accurate distance from the observed substitution pattern is solved by maximum-likelihood with empirical codon substitution matrices employed for the underlying Markov model. Comparisons with established measures of synonymous distance indicate that SynPAM has less variance and yields useful results over a longer time range.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "dS; evolutionary distance; molecular evolution; synonymous substitutions; SynPAM", } @Article{Sridhar:2007:AEN, author = "Srinath Sridhar and Kedar Dhamdhere and Guy Blelloch and Eran Halperin and R. Ravi and Russell Schwartz", title = "Algorithms for Efficient Near-Perfect Phylogenetic Tree Reconstruction in Theory and Practice", journal = j-TCBB, volume = "4", number = "4", pages = "561--571", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider the problem of reconstructing near-perfect phylogenetic trees using binary character states (referred to as BNPP). A perfect phylogeny assumes that every character mutates at most once in the evolutionary tree, yielding an algorithm for binary character states that is computationally efficient but not robust to imperfections in real data. A near-perfect phylogeny relaxes the perfect phylogeny assumption by allowing at most a constant number of additional mutations. We develop two algorithms for constructing optimal near-perfect phylogenies and provide empirical evidence of their performance. The first simple algorithm is fixed parameter tractable when the number of additional mutations and the number of characters that share four gametes with some other character are constants. The second, more involved algorithm for the problem is fixed parameter tractable when only the number of additional mutations is fixed. We have implemented both algorithms and shown them to be extremely efficient in practice on biologically significant data sets. This work proves the BNPP problem fixed parameter tractable and provides the first practical phylogenetic tree reconstruction algorithms that find guaranteed optimal solutions while being easily implemented and computationally feasible for data sets of biologically meaningful size and complexity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; computations on discrete structures; trees", } @Article{Chen:2007:CBR, author = "Jinmiao Chen and Narendra Chaudhari", title = "Cascaded Bidirectional Recurrent Neural Networks for Protein Secondary Structure Prediction", journal = j-TCBB, volume = "4", number = "4", pages = "572--582", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein secondary structure (PSS) prediction is an important topic in bioinformatics. Our study on a large set of non-homologous proteins shows that long-range interactions commonly exist and negatively affect PSS prediction. Besides, we also reveal strong correlations between secondary structure (SS) elements. In order to take into account the long-range interactions and SS-SS correlations, we propose a novel prediction system based on cascaded bidirectional recurrent neural network (BRNN). We compare the cascaded BRNN against another two BRNN architectures, namely the original BRNN architecture used for speech recognition as well as Pollastri's BRNN that was proposed for PSS prediction. Our cascaded BRNN achieves an overall three state accuracy Q3 of 74.38\%, and reaches a high Segment OVerlap (SOV) of 66.0455. It outperforms the original BRNN and Pollastri's BRNN in both Q3 and SOV. Specifically, it improves the SOV score by 4-6\%.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xiong:2007:DDK, author = "Huilin Xiong and Ya Zhang and Xue-Wen Chen", title = "Data-Dependent Kernel Machines for Microarray Data Classification", journal = j-TCBB, volume = "4", number = "4", pages = "583--595", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One important application of gene expression analysis is to classify tissue samples according to their gene expression levels. Gene expression data are typically characterized by high dimensionality and small sample size, which makes the classification task quite challenging. In this paper, we present a data-dependent kernel for microarray data classification. This kernel function is engineered so that the class separability of the training data is maximized. A bootstrapping-based resampling scheme is introduced to reduce the possible training bias. The effectiveness of this adaptive kernel for microarray data classification is illustrated with a k-Nearest Neighbor (KNN) classifier. Our experimental study shows that the data-dependent kernel leads to a significant improvement in the accuracy of KNN classifiers. Furthermore, this kernel-based KNN scheme has been demonstrated to be competitive to, if not better than, more sophisticated classifiers such as Support Vector Machines (SVMs) and the Uncorrelated Linear Discriminant Analysis (ULDA) for classifying gene expression data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bootstrapping resampling; cancer classification; kernel machines; kernel optimization; microarray data analysis", } @Article{Michal:2007:FCM, author = "Shahar Michal and Tor Ivry and Omer Cohen and Moshe Sipper and Danny Barash", title = "Finding a Common Motif of {RNA} Sequences Using Genetic Programming: The {GeRNAMo} System", journal = j-TCBB, volume = "4", number = "4", pages = "596--610", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We focus on finding a consensus motif of a set of homologous or functionally related RNA molecules. Recent approaches to this problem have been limited to simple motifs, require sequence alignment, and make prior assumptions concerning the data set. We use genetic programming to predict RNA consensus motifs based solely on the data set. Our system -- dubbed GeRNAMo (Genetic programming of RNA Motifs) -- predicts the most common motifs without sequence alignment and is capable of dealing with any motif size. Our program only requires the maximum number of stems in the motif, and if prior knowledge is available the user can specify other attributes of the motif (e.g., the range of the motif's minimum and maximum sizes), thereby increasing both sensitivity and speed. We describe several experiments using either ferritin iron response element (IRE); signal recognition particle (SRP); or microRNA sequences, showing that the most common motif is found repeatedly, and that our system offers substantial advantages over previous methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{McIntosh:2007:HCR, author = "Tara McIntosh and Sanjay Chawla", title = "High Confidence Rule Mining for Microarray Analysis", journal = j-TCBB, volume = "4", number = "4", pages = "611--623", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present an association rule mining method for mining high confidence rules, which describe interesting gene relationships from microarray datasets. Microarray datasets typically contain an order of magnitude more genes than experiments, rendering many data mining methods impractical as they are optimised for sparse datasets. A new family of row-enumeration rule mining algorithms have emerged to facilitate mining in dense datasets. These algorithms rely on pruning infrequent relationships to reduce the search space by using the support measure. This major shortcoming results in the pruning of many potentially interesting rules with low support but high confidence. We propose a new row-enumeration rule mining method, MaxConf, to mine high confidence rules from microarray data. MaxConf is a support-free algorithm which directly uses the confidence measure to effectively prune the search space. Experiments on three microarray datasets show that MaxConf outperforms support-based rule mining with respect to scalability and rule extraction. Furthermore, detailed biological analyses demonstrate the effectiveness of our approach -- the rules discovered by MaxConf are substantially more interesting and meaningful compared with support-based methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "association rules; data mining; high confidence rule mining; microarray analysis", } @Article{Ponzoni:2007:IAR, author = "Ignacio Ponzoni and Francisco Azuaje and Juan Augusto and David Glass", title = "Inferring Adaptive Regulation Thresholds and Association Rules from Gene Expression Data through Combinatorial Optimization Learning", journal = j-TCBB, volume = "4", number = "4", pages = "624--634", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "There is a need to design computational methods to support the prediction of gene regulatory networks. Such models should offer both biologically-meaningful and computationally-accurate predictions, which in combination with other techniques may improve large-scale, integrative studies. This paper presents a new machine learning method for the prediction of putative regulatory associations from expression data, which exhibit properties never or only partially addressed by other techniques recently published. The method was tested on a Saccharomyces cerevisiae gene expression dataset. The results were statistically validated and compared with the relationships inferred by two machine learning approaches to gene regulatory network prediction. Furthermore, the resulting predictions were assessed using domain knowledge. The proposed algorithm may be able to accurately predict relevant biological associations between genes. One of the most relevant features of this new method is the prediction of adaptive regulation thresholds for the discretization of gene expression values, which is required prior to the rule association learning process. Moreover, an important advantage consists of its low computational cost to infer association rules. The proposed system may significantly support exploratory, large-scale studies of automated identification of potentially-relevant gene expression associations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "combinatorial optimization; decision trees; gene expression data; genetic regulatory networks; machine-learning", } @Article{Noman:2007:IGR, author = "Nasimul Noman and Hitoshi Iba", title = "Inferring Gene Regulatory Networks using Differential Evolution with Local Search Heuristics", journal = j-TCBB, volume = "4", number = "4", pages = "634--647", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a memetic algorithm for evolving the structure of biomolecular interactions and inferring the effective kinetic parameters from the time series data of gene expression using the decoupled system formalism. We propose an Information Criteria based fitness evaluation for gene network model selection instead of the conventional Mean Squared Error (MSE) based fitness evaluation. A hill-climbing local-search method has been incorporated in our evolutionary algorithm for efficiently attaining the skeletal architecture which is most frequently observed in biological networks. The suitability of the method is tested in gene circuit reconstruction experiments, varying the network dimension and/or characteristics, the amount of gene expression data used for inference and the noise level present in expression profiles. The reconstruction method inferred the network topology and the regulatory parameters with high accuracy. Nevertheless, the performance is limited to the amount of expression data used and the noise level present in the data. The proposed fitness function has been found more suitable for identifying correct network topology and for estimating the accurate parameter values compared to the existing ones. Finally, we applied the methodology for analyzing the cell-cycle gene expression data of budding yeast and reconstructed the network of some key regulators.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; gene regulatory system; global optimization; inverse problems; medicine and science; memetic algorithm; microarray data; transcriptional regulation", } @Article{Ho:2007:ITS, author = "Shinn-Ying Ho and Chih-Hung Hsieh and Fu-Chieh Yu and Hui-Ling Huang", title = "An Intelligent Two-Stage Evolutionary Algorithm for Dynamic Pathway Identification From Gene Expression Profiles", journal = j-TCBB, volume = "4", number = "4", pages = "648--704", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "From gene expression profiles, it is desirable to rebuild cellular dynamic regulation networks to discover more delicate and substantial functions in molecular biology, biochemistry, bioengineering and pharmaceutics. S-system model is suitable to characterize biochemical network systems and capable to analyze the regulatory system dynamics. However, inference of an S-system model of N-gene genetic networks has 2N(N+1) parameters in a set of non-linear differential equations to be optimized. This paper proposes an intelligent two-stage evolutionary algorithm (iTEA) to efficiently infer the S-system models of genetic networks from time-series data of gene expression. To cope with curse of dimensionality, the proposed algorithm consists of two stages where each uses a divide-and-conquer strategy. The optimization problem is first decomposed into $N$ subproblems having 2(N+1) parameters each. At the first stage, each subproblem is solved using a novel intelligent genetic algorithm (IGA) with intelligent crossover based on orthogonal experimental design (OED). At the second stage, the obtained $N$ solutions to the $N$ subproblems are combined and refined using an OED-based simulated annealing algorithm for handling noisy gene expression profiles. The effectiveness of iTEA is evaluated using simulated expression patterns with and without noise running on a single-processor PC. It is shown that (1) IGA is efficient enough to solve subproblems; (2) IGA is significantly superior to the existing method SPXGA; and (3) iTEA performs well in inferring S-system models for dynamic pathway identification.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "divide-and-conquer; evolutionary algorithm; genetic network; orthogonal experimental design; pathway identification; S-system model", } @Article{Bereg:2007:PNB, author = "Sergey Bereg and Yuanyi Zhang", title = "Phylogenetic Networks Based on the Molecular Clock Hypothesis", journal = j-TCBB, volume = "4", number = "4", pages = "661--667", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A classical result in phylogenetic trees is that a binary phylogenetic tree adhering to the molecular clock hypothesis exists if and only if the matrix of distances between taxa is ultrametric. The ultrametric condition is very restrictive. In this paper we study phylogenetic networks that can be constructed assuming the molecular clock hypothesis. We characterize distance matrices that admit such networks for 3 and 4 taxa. We also design two algorithms for constructing networks optimizing the least-squares fit.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "least-squares fit; molecular clock hypothesis; Phylogenetic Networks", } @Article{Blazewicz:2007:SPD, author = "Jacek Blazewicz and Edmund Burke and Marta Kasprzak and Alexandr Kovalev and Mikhail Kovalyov", title = "Simplified Partial Digest Problem: Enumerative and Dynamic Programming Algorithms", journal = j-TCBB, volume = "4", number = "4", pages = "668--680", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study the Simplified Partial Digest Problem (SPDP), which is a mathematical model for a new simplified partial digest method of genome mapping. This method is easy for laboratory implementation and robust with respect to the experimental errors. SPDP is NP-hard in the strong sense. We present an $ O(n2^n) $ time enumerative algorithm and an $ O(n^{2q}) $ time dynamic programming algorithm for the error-free SPDP, where $n$ is the number of restriction sites and $q$ is the number of distinct intersite distances. We also give examples of the problem, in which there are $ 2^{\frac {n + 23} - 1} $ non-congruent solutions. These examples partially answer a question recently posed in the literature about the number of solutions of SPDP. We adapt our enumerative algorithm for handling SPDP with imprecise input data. Finally, we describe and discuss the results of the computer experiments with our algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithm design and analysis; dynamic programming; genome mapping; imprecise information; restriction site analysis", } @Article{Xu:2007:IGR, author = "Rui Xu and Donald {Wunsch II} and Ronald Frank", title = "Inference of Genetic Regulatory Networks with Recurrent Neural Network Models Using Particle Swarm Optimization", journal = j-TCBB, volume = "4", number = "4", pages = "681--692", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genetic regulatory network inference is critically important for revealing fundamental cellular processes, investigating gene functions, and understanding their relations. The availability of time series gene expression data makes it possible to investigate the gene activities of whole genomes, rather than those of only a pair of genes or among several genes. However, current computational methods do not sufficiently consider the temporal behavior of this type of data and lack the capability to capture the complex nonlinear system dynamics. We propose a recurrent neural network (RNN) and particle swarm optimization (PSO) approach to infer genetic regulatory networks from time series gene expression data. Under this framework, gene interaction is explained through a connection weight matrix. Based on the fact that the measured time points are limited and the assumption that the genetic networks are usually sparsely connected, we present a PSO-based search algorithm to unveil potential genetic network constructions that fit well with the time series data and explore possible gene interactions. Furthermore, PSO is used to train the RNN and determine the network parameters. Our approach has been applied to both synthetic and real data sets. The results demonstrate that the RNN\slash PSO can provide meaningful insights in understanding the nonlinear dynamics of the gene expression time series and revealing potential regulatory interactions between genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "genetic regulatory networks; particle swarm optimization; recurrent neural networks; time series gene expression data", } @Article{Agius:2007:TSA, author = "Phaedra Agius and Barry Kreiswirth and Steve Naidich and Kristin Bennett", title = "Typing \bioname{Staphylococcus aureus} Using the spa Gene and Novel Distance Measures", journal = j-TCBB, volume = "4", number = "4", pages = "693--704", month = oct, year = "2007", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:58:47 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We developed an approach for identifying groups or families of Staphylococcus aureus bacteria based on genotype data. With the emergence of drug resistant strains, \bioname{S. aureus} represents a significant human health threat. Identifying the family types efficiently and quickly is crucial in community settings. Here, we develop a hybrid sequence algorithm approach to type this bacterium using only its spa gene. Two of the sequence algorithms we used are well established, while the third, the Best Common Gap-Weighted Sequence (BCGS), is novel. We combined the sequence algorithms with a weighted match/mismatch algorithm for the spa sequence ends. Normalized similarity scores and distances between the sequences were derived and used within unsupervised clustering methods. The resulting spa groupings correlated strongly with the groups defined by the well-established Multi locus sequence typing (MLST) method. Spa typing is preferable to MLST typing which types seven genes instead of just one. Furthermore, our spa clustering methods can be fine-tuned to be more discriminative than MLST, identifying new strains that the MLST method may not. Finally, we performed a multidimensional scaling of our distance matrices to visualize the relationship between isolates. The proposed methodology provides a promising new approach to molecular epidemiology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "clustering; genotyping; molecular epidemiology; sequence algorithms; staphylococcus aureus", } @Article{Congdon:2008:EIC, author = "Clare Bates Congdon and Joseph C. Aman and Gerardo M. Nava and H. Rex Gaskins and Carolyn J. Mattingly", title = "An Evaluation of Information Content as a Metric for the Inference of Putative Conserved Noncoding Regions in {DNA} Sequences Using a Genetic Algorithms Approach", journal = j-TCBB, volume = "5", number = "1", pages = "1--14", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In previous work, we presented GAMI [1], an approach to motif inference that uses a genetic algorithms search. GAMI is designed specifically to find putative conserved regulatory motifs in noncoding regions of divergent species, and is designed to allow for analysis of long nucleotide sequences. In this work, we compare GAMI's performance when run with its original fitness function (a simple count of the number of matches) and when run with information content, as well as several variations on these metrics. Results indicate that information content does not identify highly conserved regions, and thus is not the appropriate metric for this task, while variations on information content as well as the original metric succeed in identifying putative conserved regions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; evolutionary computing and genetic algorithms", } @Article{Boscolo:2008:ITE, author = "Riccardo Boscolo and James C. Liao and Vwani P. Roychowdhury", title = "An Information Theoretic Exploratory Method for Learning Patterns of Conditional Gene Coexpression from Microarray Data", journal = j-TCBB, volume = "5", number = "1", pages = "15--24", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this article, we introduce an exploratory framework for learning patterns of conditional co-expression in gene expression data. The main idea behind the proposed approach consists of estimating how the information content shared by a set of $M$ nodes in a network (where each node is associated to an expression profile) varies upon conditioning on a set of L conditioning variables (in the simplest case represented by a separate set of expression profiles). The method is non-parametric and it is based on the concept of statistical co-information, which, unlike conventional correlation based techniques, is not restricted in scope to linear conditional dependency patterns. Moreover, such conditional co-expression relationships can potentially indicate regulatory interactions that do not manifest themselves when only pair-wise relationships are considered. A moment based approximation of the co-information measure is derived that efficiently gets around the problem of estimating high-dimensional multi-variate probability density functions from the data, a task usually not viable due to the intrinsic sample size limitations that characterize expression level measurements. By applying the proposed exploratory method, we analyzed a whole genome microarray assay of the eukaryote Saccharomices cerevisiae and were able to learn statistically significant patterns of conditional co-expression. A selection of such interactions that carry a meaningful biological interpretation are discussed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "co-information; entropy; gene expression data; information theory; statistical analysis", } @Article{Wiese:2008:REA, author = "Kay C. Wiese and Alain A. Deschenes and Andrew G. Hendriks", title = "{RnaPredict---An} Evolutionary Algorithm for {RNA} Secondary Structure Prediction", journal = j-TCBB, volume = "5", number = "1", pages = "25--41", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents two in-depth studies on RnaPredict, an evolutionary algorithm for RNA secondary structure prediction. The first study is an analysis of the performance of two thermodynamic models, INN and INN-HB. The correlation between the free energy of predicted structures and the sensitivity is analyzed for 19 RNA sequences. Although some variance is shown, there is a clear trend between a lower free energy and an increase in true positive base pairs. With increasing sequence length, this correlation generally decreases. In the second experiment, the accuracy of the predicted structures for these 19 sequences are compared against the accuracy of the structures generated by the mfold dynamic programming algorithm (DPA) and also to known structures. RnaPredict is shown to outperform the minimum free energy structures produced by mfold and has comparable performance when compared to sub-optimal structures produced by mfold.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "evolutionary computation; RNA secondary structure prediction; RnaPredict", } @Article{Rother:2008:SCP, author = "Diego Rother and Guillermo Sapiro and Vijay Pande", title = "Statistical Characterization of Protein Ensembles", journal = j-TCBB, volume = "5", number = "1", pages = "42--55", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "When accounting for structural fluctuations or measurement errors, a single rigid structure may not be sufficient to represent a protein. One approach to solve this problem is to represent the possible conformations as a discrete set of observed conformations, an ensemble. In this work, we follow a different richer approach, and introduce a framework for estimating probability density functions in very high dimensions, and then apply it to represent ensembles of folded proteins. This proposed approach combines techniques such as kernel density estimation, maximum likelihood, cross-validation, and bootstrapping. We present the underlying theoretical and computational framework and apply it to artificial data and protein ensembles obtained from molecular dynamics simulations. We compare the results with those obtained experimentally, illustrating the potential and advantages of this representation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Bayesian networks; bootstrapping; cross-validation; density estimation; graphical models; maximum likelihood; protein ensembles", } @Article{Cui:2008:AAU, author = "Yun Cui and Lusheng Wang and Daming Zhu and Xiaowen Liu", title = "A $ (1.5 + {\epsilon }) $-Approximation Algorithm for Unsigned Translocation Distance", journal = j-TCBB, volume = "5", number = "1", pages = "56--66", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome rearrangement is an important area in computational biology and bioinformatics. The translocation operation is one of the popular operations for genome rearrangement. It was proved that computing the unsigned translocation distance is NP-hard. In this paper, we present a $ (1.5 + \epsilon) $-approximation algorithm for computing unsigned translocation distance which improves upon the best known 1.75-ratio. The running time of our algorithm is $ O(n^2 + (4 / \epsilon)^1.5 \surd \log (4 / \epsilon)2 4^\epsilon) $, where $n$ is the total number of genes in the genome.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "and approximation algorithms; genome rearrangement; unsigned translocation", } @Article{Tan:2008:NBP, author = "Tuan Zea Tan and Geok See Ng and Chai Quek", title = "A Novel Biologically and Psychologically Inspired Fuzzy Decision Support System: Hierarchical Complementary Learning", journal = j-TCBB, volume = "5", number = "1", pages = "67--79", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A computational intelligent system that models the human cognitive abilities may promise significant performance in problem learning because human is effective in learning and problem solving. Functionally modeling the human cognitive abilities not only avoids the details of the underlying neural mechanisms performing the tasks, but also reduces the complexity of the system. The complementary learning mechanism is responsible for human pattern recognition, i.e. human attends to positive and negative samples when making decision. Furthermore, human concept learning is organized in a hierarchical fashion. Such hierarchical organization allows the divide-and-conquer approach to the problem. Thus, integrating the functional models of hierarchical organization and complementary learning can potentially improve the performance in pattern recognition. Hierarchical complementary learning exhibits many of the desirable features of pattern recognition. It is further supported by the experimental results that verify the rationale of the integration and that the hierarchical complementary learning system is a promising pattern recognition tool.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "cognitive learning; complementary learning; decision support; fuzzy neural network; hierarchical model", } @Article{Ciocchetta:2008:ATS, author = "Federica Ciocchetta and Corrado Priami and Paola Quaglia", title = "An Automatic Translation of {SBML} into Beta-Binders", journal = j-TCBB, volume = "5", number = "1", pages = "80--90", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A translation of SBML (Systems Biology Markup Language) into a process algebra is proposed in order to allow the formal specification, the simulation and the formal analysis of biological models. Beta-binders, a language with a quantitative stochastic extension, is chosen for the translation. The proposed translation focuses on the main components of SBML models, as species and reactions. Furthermore, it satisfies the compositional property, i.e. the translation of the whole model is obtained by composing the translation of the subcomponents. An automatic translator tool of SBML models into Beta-binders has been implemented as well. Finally, the translation of a simple model is reported.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biological systems; modeling; Process algebras; systems biology; Systems Biology Markup Language (SBML); translation tool", } @Article{Bocker:2008:CAM, author = "Sebastian Bocker and Veli Makinen", title = "Combinatorial Approaches for Mass Spectra Recalibration", journal = j-TCBB, volume = "5", number = "1", pages = "91--100", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mass spectrometry has become one of the most popular analysis techniques in Proteomics and Systems Biology. With the creation of larger datasets, the automated recalibration of mass spectra becomes important to ensure that every peak in the sample spectrum is correctly assigned to some peptide and protein. Algorithms for recalibrating mass spectra have to be robust with respect to wrongly assigned peaks, as well as efficient due to the amount of mass spectrometry data. The recalibration of mass spectra leads us to the problem of finding an optimal matching between mass spectra under measurement errors. We have developed two deterministic methods that allow robust computation of such a matching: The first approach uses a computational geometry interpretation of the problem, and tries to find two parallel lines with constant distance that stab a maximal number of points in the plane. The second approach is based on finding a maximal common approximate subsequence, and improves existing algorithms by one order of magnitude exploiting the sequential nature of the matching problem. We compare our results to a computational geometry algorithm using a topological line-sweep.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biotechnology; combinatorial pattern matching; computational geometry; mass spectrometry", } @Article{Barzuza:2008:CPP, author = "Tamar Barzuza and Jacques S. Beckmann and Ron Shamir and Itsik Pe'er", title = "Computational Problems in Perfect Phylogeny Haplotyping: Typing without Calling the Allele", journal = j-TCBB, volume = "5", number = "1", pages = "101--109", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A haplotype is an m-long binary vector. The xor-genotype of two haplotypes is the m-vector of their coordinate-wise xor. We study the following problem: Given a set of xor-genotypes, reconstruct their haplotypes so that the set of resulting haplotypes can be mapped onto a perfect phylogeny tree. The question is motivated by studying population evolution in human genetics, and is a variant of the perfect phylogeny haplotyping problem that has received intensive attention recently. Unlike the latter problem, in which the input is `full' genotypes, here we assume less informative input, and so may be more economical to obtain experimentally. Building on ideas of Gusfield, we show how to solve the problem in polynomial time, by a reduction to the graph realization problem. The actual haplotypes are not uniquely determined by that tree they map onto, and the tree itself may or may not be unique. We show that tree uniqueness implies uniquely determined haplotypes, up to inherent degrees of freedom, and give a sufficient condition for the uniqueness. To actually determine the haplotypes given the tree, additional information is necessary. We show that two or three full genotypes suffice to reconstruct all the haplotypes, and present a linear algorithm for identifying those genotypes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "graph realization; haplotypes; perfect phylogeny; XOR-genotypes", } @Article{Chin:2008:DMR, author = "Francis Chin and Henry C. M. Leung", title = "{DNA} Motif Representation with Nucleotide Dependency", journal = j-TCBB, volume = "5", number = "1", pages = "110--119", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of discovering novel motifs of binding sites is important to the understanding of gene regulatory networks. Motifs are generally represented by matrices (PWM or PSSM) or strings. However, these representations cannot model biological binding sites well because they fail to capture nucleotide interdependence. It has been pointed out by many researchers that the nucleotides of the DNA binding site cannot be treated independently, e.g. the binding sites of zinc finger in proteins. In this paper, a new representation called Scored PositionSpecific Pattern (SPSP), which is a generalization of the matrix and string representations, is introduced which takes into consideration the dependent occurrences of neighboring nucleotides. Even though the problem of discovering the optimal motif in SPSP representation is proved to beNP-hard, we introduce a heuristic algorithm called SPSP-Finder, which can effectively find optimal motifs in most simulated cases and some real cases for which existing popular motif finding software, such as Weeder, MEME and AlignACE, fail.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Computing methodologies; design methodology; pattern analysis; pattern recognition", } @Article{Yin:2008:NAC, author = "Zong-Xian Yin and Jung-Hsien Chiang", title = "Novel Algorithm for Coexpression Detection in Time-Varying Microarray Data Sets", journal = j-TCBB, volume = "5", number = "1", pages = "120--135", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "When analyzing the results of microarray experiments, biologists generally use unsupervised categorization tools. However, such tools regard each time point as an independent dimension and utilize the Euclidean distance to compute the similarities between expressions. Furthermore, some of these methods require the number of clusters to be determined in advance, which is clearly impossible in the case of a new dataset. Therefore, this study proposes a novel scheme, designated as the Variation-based Co-expression Detection (VCD) algorithm, to analyze the trends of expressions based on their variation over time. The proposed algorithm has two advantages. First, it is unnecessary to determine the number of clusters in advance since the algorithm automatically detects those genes whose profiles are grouped together and creates patterns for these groups. Second, the algorithm features a new measurement criterion for calculating the degree of change of the expressions between adjacent time points and evaluating their trend similarities. Three real-world microarray datasets are employed to evaluate the performance of the proposed algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics; clustering; data mining; gene expression; pattern analysis; time series analysis", } @Article{Goeffon:2008:PTN, author = "Adrien Goeffon and Jean-Michel Richer and Jin-Kao Hao", title = "Progressive Tree Neighborhood Applied to the Maximum Parsimony Problem", journal = j-TCBB, volume = "5", number = "1", pages = "136--145", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Maximum Parsimony problem aims at reconstructing a phylogenetic tree from DNA sequences while minimizing the number of genetic transformations. To solve this NP-complete problem, heuristic methods have been developed, often based on local search. In this article, we focus on the influence of the neighborhood relations. After analyzing the advantages and drawbacks of the well-known NNI, SPR and TBR neighborhoods, we introduce the concept of Progressive Neighborhood which consists in constraining progressively the size of the neighborhood as the search advances. We empirically show that applied to the Maximum Parsimony problem, this progressive neighborhood turns out to be more efficient and robust than the classic neighborhoods using a descent algorithm. Indeed, it allows to find better solutions with a smaller number of iterations or trees evaluated.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "combinatorial algorithms; maximum parsimony; optimization; phylogeny reconstruction", } @Article{Anonymous:2008:RL, author = "Anonymous", title = "2007 Reviewers List", journal = j-TCBB, volume = "5", number = "1", pages = "146--147", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2008:AI, author = "Anonymous", title = "2007 Annual Index", journal = j-TCBB, volume = "5", number = "1", pages = "148--158", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2008:CAE, author = "Anonymous", title = "Call for Applications for {Editor-in-Chief}", journal = j-TCBB, volume = "5", number = "1", pages = "159--159", month = jan, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:11 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jackson:2008:CGM, author = "Benjamin N. Jackson and Patrick S. Schnable and Srinivas Aluru", title = "Consensus Genetic Maps as Median Orders from Inconsistent Sources", journal = j-TCBB, volume = "5", number = "2", pages = "161--171", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A genetic map is an ordering of genetic markers calculated from a population of known lineage. While traditionally a map has been generated from a single population for each species, recently researchers have created maps from multiple populations. In the face of these new data, we address the need to find a consensus map --- a map that combines the information from multiple partial and possibly inconsistent input maps. We model each input map as a partial order and formulate the consensus problem as finding a median partial order. Finding the median of multiple total orders (preferences or rankings)is a well studied problem in social choice. We choose to find the median using the weighted symmetric difference distance, a more general version of both the symmetric difference distance and the Kemeny distance. Finding a median order using this distance is NP-hard. We show that for our chosen weight assignment, a median order satisfies the positive responsiveness, extended Condorcet,and unanimity criteria. Our solution involves finding the maximum acyclic subgraph of a weighted directed graph. We present a method that dynamically switches between an exact branch and bound algorithm and a heuristic algorithm, and show that for real data from closely related organisms, an exact median can often be found. We present experimental results using seven populations of the crop plant \bioname{Zea mays}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "genetic map; Kemeny distance; median order; path and circuit problems; symmetric difference distance.", } @Article{Gupta:2008:EDS, author = "Anupam Gupta and Ziv Bar-Joseph", title = "Extracting Dynamics from Static Cancer Expression Data", journal = j-TCBB, volume = "5", number = "2", pages = "172--182", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Static expression experiments analyze samples from many individuals. These samples are often snapshots of the progression of a certain disease such as cancer. This raises an intriguing question: Can we determine a temporal order for these samples? Such an ordering can lead to better understanding of the dynamics of the disease and to the identification of genes associated with its progression. In this paper we formally prove, for the first time, that under a model for the dynamics of the expression levels of a single gene, it is indeed possible to recover the correct ordering of the static expression datasets by solving an instance of the traveling salesman problem (TSP). In addition, we devise an algorithm that combines a TSP heuristic and probabilistic modeling for inferring the underlying temporal order of the microarray experiments. This algorithm constructs probabilistic continuous curves to represent expression profiles leading to accurate temporal reconstruction for human data. Applying our method to cancer expression data we show that the ordering derived agrees well with survival duration. A classifier that utilizes this ordering improves upon other classifiers suggested for this task. The set of genes displaying consistent behavior for the determined ordering are enriched for genes associated with cancer progression.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "EM; glioma; microarrays; traveling salesman", } @Article{Thomas:2008:GMR, author = "John Thomas and Naren Ramakrishnan and Chris Bailey-Kellogg", title = "Graphical Models of Residue Coupling in Protein Families", journal = j-TCBB, volume = "5", number = "2", pages = "183--197", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many statistical measures and algorithmic techniques have been proposed for studying residue coupling in protein families. Generally speaking, two residue positions are considered coupled if, in the sequence record, some of their amino acid type combinations are significantly more common than others. While the proposed approaches have proven useful in finding and describing coupling, a significant missing component is a formal probabilistic model that explicates and compactly represents the coupling, integrates information about sequence,structure, and function, and supports inferential procedures for analysis, diagnosis, and prediction. We present an approach to learning and using probabilistic graphical models of residue coupling. These models capture significant conservation and coupling constraints observable in a multiply-aligned set of sequences. Our approach can place a structural prior on considered couplings, so that all identified relationships have direct mechanistic explanations. It can also incorporate information about functional classes, and thereby learn a differential graphical model that distinguishes constraints common to all classes from those unique to individual classes. Such differential models separately account for class-specific conservation and family-wide coupling, two different sources of sequence covariation. They are then able to perform interpretable functional classification of new sequences, explaining classification decisions in terms of the underlying conservation and coupling constraints. We apply our approach in studies of both G protein-coupled receptors and PDZ domains, identifying and analyzing family-wide and class-specific constraints, and performing functional classification. The results demonstrate that graphical models of residue coupling provide a powerful tool for uncovering, representing, and utilizing significant sequence structure-function relationships in protein families.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "correlated mutations; evolutionary covariation; functional classification; graphical models; sequence-structure-function relationships", } @Article{Mena-Chalco:2008:IPC, author = "Jesus Mena-Chalco and Helaine Carrer and Yossi Zana and Roberto M. {Cesar Jr.}", title = "Identification of Protein Coding Regions Using the Modified {Gabor}-Wavelet Transform", journal = j-TCBB, volume = "5", number = "2", pages = "198--207", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An important topic in genomic sequence analysis is the identification of protein coding regions. In this context, several coding DNA model-independent methods, based on the occurrence of specific patterns of nucleotides at coding regions, have been proposed. Nonetheless, these methods have not been completely suitable due to their dependence on an empirically pre-defined window length required for a local analysis of a DNA region. We introduce a method, based on a modified Gabor-wavelet transform (MGWT), for the identification of protein coding regions. This novel transform is tuned to analyze periodic signal components and presents the advantage of being independent of the window length. We compared the performance of the MGWT with other methods using eukaryote datasets. The results show that the MGWT outperforms all assessed model-independent methods with respect to identification accuracy. These results indicate that the source of at least part of the identification errors produced by the previous methods is the fixed working scale. The new method not only avoids this source of errors, but also makes available a tool for detailed exploration of the nucleotide occurrence.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; pattern recognition; signal processing", } @Article{deJong:2008:SSS, author = "Hidde de Jong and Michel Page", title = "Search for Steady States of Piecewise-Linear Differential Equation Models of Genetic Regulatory Networks", journal = j-TCBB, volume = "5", number = "2", pages = "208--222", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analysis of the attractors of a genetic regulatory network gives a good indication of the possible functional modes of the system. In this paper we are concerned with the problem of finding all steady states of genetic regulatory networks described by piecewise-linear differential equation (PLDE) models. We show that the problem is NP-hard and translate it into a propositional satisfiability (SAT) problem. This allows the use of existing, efficient SAT solvers and has enabled the development of a steady state search module of the computer tool Genetic Network Analyzer (GNA). The practical use of this module is demonstrated by means of the analysis of a number of relatively small bacterial regulatory networks as well as randomly generated networks of several hundreds of genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "genetic regulatory networks; large-scale systems; piecewise-linear differential equations; SAT problem; steady states", } @Article{Sadot:2008:TVB, author = "Avital Sadot and Jasmin Fisher and Dan Barak and Yishai Admanit and Michael J. Stern and E. Jane Albert Hubbard and David Harel", title = "Toward Verified Biological Models", journal = j-TCBB, volume = "5", number = "2", pages = "223--234", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The last several decades have witnessed a vast accumulation of biological data and data analysis. Many of these data sets represent only a small fraction of the system's behavior, making the visualization of full system behavior difficult. A more complete understanding of a biological system is gained when different types of data (and/or conclusions drawn from the data) are integrated into a larger-scale representation or model of the system. Ideally, this type of model is consistent with all available data about the system, and it is then used to generate additional hypotheses to be tested. Computer-based methods intended to formulate models that integrate various events and to test the consistency of these models with respect to the laboratory-based observations on which they are based are potentially very useful. In addition, in contrast to informal models, the consistency of such formal computer-based models with laboratory data can be tested rigorously by methods of formal verification. We combined two formal modeling approaches in computer science that were originally developed for non-biological system design. One is the inter-object approach using the language of live sequence charts (LSCs) with the Play-Engine tool, and the other is the intra-object approach using the language of statecharts and Rhapsody as the tool. Integration is carried out using InterPlay, a simulation engine coordinator. Using these tools, we constructed a combined model comprising three modules. One module represents the early lineage of the somatic gonad of \bioname{C. elegans} in LSCs, while a second more detailed module in statecharts represents an interaction between two cells within this lineage that determine their developmental outcome. Using the advantages of the tools, we created a third module representing a set of key experimental data using LSCs. We tested the combined statechart-LSC model by showing that the simulations were consistent with the set of experimental LSCs. This small-scale modular example demonstrates the potential for using similar approaches for verification by exhaustive testing of models by LSCs. It also shows the advantages of these approaches for modeling biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "C. elegans; modeling; statecharts; verification", } @Article{Spillner:2008:CPD, author = "Andreas Spillner and Binh T. Nguyen and Vincent Moulton", title = "Computing Phylogenetic Diversity for Split Systems", journal = j-TCBB, volume = "5", number = "2", pages = "235--244", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In conservation biology it is a central problem to measure, predict, and preserve biodiversity as species face extinction. In 1992 Faith proposed measuring the diversity of a collection of species in terms of their relationships on a phylogenetic tree, and to use this information to identify collections of species with high diversity. Here we are interested in some variants of the resulting optimization problem that arise when considering species whose evolution is better represented by a network rather than a tree. More specifically, we consider the problem of computing phylogenetic diversity relative to a split system on a collection of species of size $n$. We show that for general split systems this problem is NP-hard. In addition we provide some efficient algorithms for some special classes of split systems, in particular presenting an optimal $ O(n) $ time algorithm for phylogenetic trees and an $ O(n \log n + n k) $ time algorithm for choosing an optimal subset of size $k$ relative to a circular split system.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; life and medical sciences", } @Article{Lancia:2008:HDA, author = "Giuseppe Lancia and R. Ravi and Romeo Rizzi", title = "Haplotyping for Disease Association: a Combinatorial Approach", journal = j-TCBB, volume = "5", number = "2", pages = "245--251", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider a combinatorial problem derived from haplotyping a population with respect to a genetic disease, either recessive or dominant. Given a set of individuals, partitioned into healthy and diseased, and the corresponding sets of genotypes, we want to infer ``bad'' and ``good'' haplotypes to account for these genotypes and for the disease. Assume e.g. the disease is recessive. Then, the resolving haplotypes must consist of {\em bad\/} and {\em good\/} haplotypes, so that (i) each genotype belonging to a diseased individual is explained by a pair of bad haplotypes and (ii) each genotype belonging to a healthy individual is explained by a pair of haplotypes of which at least one is good. We prove that the associated decision problem is NP-complete. However, we also prove that there is a simple solution, provided the data satisfy a very weak requirement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; combinatorics; discrete mathematics", } @Article{Gusev:2008:HSG, author = "Alexander Gusev and Ion I. M{\~a}ndoiu and Bogdan Pa{\c{s}}aniuc", title = "Highly Scalable Genotype Phasing by Entropy Minimization", journal = j-TCBB, volume = "5", number = "2", pages = "252--261", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A Single Nucleotide Polymorphism (SNP) is a position in the genome at which two or more of the possible four nucleotides occur in a large percentage of the population. SNPsaccount for most of the genetic variability between individuals,and mapping SNPs in the human population has become the next high-priority in genomics after the completion of the HumanGenome project. In diploid organisms such as humans, there are two non-identical copies of each autosomal chromosome. A description of the SNPs in a chromosome is called a haplotype. At present, it is prohibitively expensive to directly determine the haplotypes of an individual, but it is possible to obtain rather easily the conflated SNP information in the so called genotype. Computational methods for genotype phasing, i.e., inferring haplotypes from genotype data, have received much attention in recent years as haplotype information leads to increased statistical power of disease association tests. However, many of the existing algorithms have impractical running time for phasing large genotype datasets such as those generated by the international HapMap project. In this paper we propose a highly scalable algorithm based on entropy minimization. Our algorithm is capable of phasing both unrelated and related genotypes coming from complex pedigrees. Experimental results on both real and simulated datasets show that our algorithm achieves a phasing accuracy worse but close to that of best existing methods while being several orders of magnitude faster. The open source code implementation of the algorithm and a web interface are publicly available at \path=http://dna.engr.uconn.edu/~software/ent/=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithm; genotype phasing; haplotype; Single Nucleotide Polymorphism", } @Article{Zhao:2008:ICG, author = "Wentao Zhao and Erchin Serpedin and Edward R. Dougherty", title = "Inferring Connectivity of Genetic Regulatory Networks Using Information-Theoretic Criteria", journal = j-TCBB, volume = "5", number = "2", pages = "262--274", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, the concept of mutual information has been proposed for infering the structure of genetic regulatory networks from gene expression profiling. After analyzing the limitations of mutual information in inferring the gene-to-gene interactions, this paper introduces the concept of conditional mutual information and based on it proposes two novel algorithms to infer the connectivity structure of genetic regulatory networks. One of the proposed algorithms exhibits a better accuracy while the other algorithm excels in simplicity and flexibility. By exploiting the mutual information and conditional mutual information, a practical metric is also proposed to assess the likeliness of direct connectivity between genes. This novel metric resolves a common limitation associated with the current inference algorithms, namely the situations where the gene connectivity is established in terms of the dichotomy of being either connected or disconnected. Based on the data sets generated by synthetic networks, the performance of the proposed algorithms is compared favorably relative to existing state-of-the-art schemes. The proposed algorithms are also applied on realistic biological measurements, such as the cutaneous melanoma data set, and biological meaningful results are inferred.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; DNA microarray; genetic regulatory network; information theory", } @Article{Bordewich:2008:NRS, author = "Magnus Bordewich and Charles Semple", title = "Nature Reserve Selection Problem: a Tight Approximation Algorithm", journal = j-TCBB, volume = "5", number = "2", pages = "275--280", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Nature Reserve Selection Problem is a problem that arises in the context of studying biodiversity conservation. Subject to budgetary constraints, the problem is to select a set of regions to conserve so that the phylogenetic diversity of the set of species contained within those regions is maximized. Recently, it was shown in a paper by Moulton {\em et al.} that this problem is NP-hard. In this paper, we establish a tight polynomial-time approximation algorithm for the Nature Reserve Section Problem. Furthermore, we resolve a question on the computational complexity of a related problem left open in Moulton {\em et al.}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "combinatorial algorithms; trees", } @Article{Hsieh:2008:OAI, author = "Yong-Hsiang Hsieh and Chih-Chiang Yu and Biing-Feng Wang", title = "Optimal Algorithms for the Interval Location Problem with Range Constraints on Length and Average", journal = j-TCBB, volume = "5", number = "2", pages = "281--290", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Let $A$ be a sequence of $n$ real numbers, $ L_1 $ and $ L_2 $ be two integers such that $ L_1 \leq L_2 $, and $ R_1 $ and $ R_2 $ be two real numbers such that $ R_1 \leq R_2 $. An interval of $A$ is feasible if its length is between $ L_1 $ and $ L_2 $ and its average is between $ R_1 $ and $ R_2 $. In this paper, we study the following problems: finding all feasible intervals of $A$, counting all feasible intervals of $A$, finding a maximum cardinality set of non-overlapping feasible intervals of $A$, locating a longest feasible interval of $A$, and locating a shortest feasible interval of $A$. The problems are motivated from the problem of locating CpG islands in biomolecular sequences. In this paper, we firstly show that all the problems have an $ \Omega (n \log n) $-time lower bound in the comparison model. Then, we use geometric approaches to design optimal algorithms for the problems. All the presented algorithms run in an on-line manner and use $ O(n) $ space.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithms; analysis of algorithms; data structures; geometrical problems and computations", } @Article{Lamers:2008:PRX, author = "Susanna L. Lamers and Marco Salemi and Michael S. McGrath and Gary B. Fogel", title = "Prediction of {R5}, {X4}, and {R5X4} {HIV}-1 Coreceptor Usage with Evolved Neural Networks", journal = j-TCBB, volume = "5", number = "2", pages = "291--300", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The HIV-1 genome is highly heterogeneous. This variation affords the virus a wide range of molecular properties, including the ability to infect cell types, such as macrophages and lymphocytes, expressing different chemokine receptors on the cell surface. In particular, R5 HIV-1 viruses use CCR5 as co-receptor for viral entry, X4 viruses use CXCR4, whereas some viral strains, known as R5X4 or D-tropic, have the ability to utilize both co-receptors. X4 and R5X4 viruses are associated with rapid disease progression to AIDS. R5X4 viruses differ in that they have yet to be characterized by the examination of the genetic sequence of HIV-1 alone. In this study, a series of experiments was performed to evaluate different strategies of feature selection and neural network optimization. We demonstrate the use of artificial neural networks trained via evolutionary computation to predict viral co-receptor usage. The results indicate identification of R5X4 viruses with predictive accuracy of 75.5\%.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "AIDS; artificial neural networks; Computational intelligence; dual-tropic viruses; evolutionary computation; HIV; phenotype prediction; tropism", } @Article{vanIersel:2008:SIT, author = "Leo van Iersel and Judith Keijsper and Steven Kelk and Leen Stougie", title = "Shorelines of Islands of Tractability: Algorithms for Parsimony and Minimum Perfect Phylogeny Haplotyping Problems", journal = j-TCBB, volume = "5", number = "2", pages = "301--312", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem Parsimony Haplotyping (PH) asks for the smallest set of haplotypes which can explain a given set of genotypes, and the problem Minimum Perfect Phylogeny Haplotyping (MPPH) asks for the smallest such set which also allows the haplotypes to be embedded in a perfect phylogeny, an evolutionary tree with biologically-motivated restrictions. For PH, we extend recent work by further mapping the interface between ``easy'' and ``hard'' instances, within the framework of $ (k, l) $-bounded instances where the number of 2's per column and row of the input matrix is restricted. By exploring, in the same way, the tractability frontier of MPPH we provide the first concrete, positive results for this problem. In addition, we construct for both PH and MPPH polynomial time approximation algorithms, based on properties of the columns of the input matrix.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; combinatorial algorithms; complexity hierarchies", } @Article{Brinza:2008:SPM, author = "Dumitru Brinza and Alexander Zelikovsky", title = "{2SNP}: Scalable Phasing Method for Trios and Unrelated Individuals", journal = j-TCBB, volume = "5", number = "2", pages = "313--318", month = apr, year = "2008", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jun 12 16:59:29 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Emerging microarray technologies allow affordable typing of very long genome sequences. A key challenge in analyzing of such huge amount of data is scalable and accurate computational inferring of haplotypes (i.e., splitting of each genotype into a pair of corresponding haplotypes). In this paper, we first phase genotypes consisting only of two SNPs using genotypes frequencies adjusted to the random mating model and then extend phasing of two-SNP genotypes to phasing of complete genotypes using maximum spanning trees. Runtime of the proposed 2SNP algorithm is $ O(n m (n + \log m)) $, where $n$ and $m$ are the numbers of genotypes and SNPs, respectively, and it can handle genotypes spanning entire chromosomes in a matter of hours. On datasets across 23 chromosomal regions from HapMap[11], 2SNP is several orders of magnitude faster than GERBIL and PHASE while matching them in quality measured by the number of correctly phased genotypes, single-site and switching errors. For example the 2SNP software phases entire chromosome ($ 10^5 $ SNPs from HapMap) for 30 individuals in 2 hours with average switching error 7.7\%. We have also enhanced 2SNP algorithm to phase family trio data and compared it with four other well-known phasing methods on simulated data from [15]. 2SNP is much faster than all of them while losing in quality only to PHASE. 2SNP software is publicly available at \path=http://alla.cs.gsu.edu/~software/2SNP=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithm; genotype; haplotype; phasing; SNP", } @Article{Mandoiu:2008:GEI, author = "Ion I. Mandoiu and Yi Pan and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "5", number = "3", pages = "321--322", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.85", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sridhar:2008:MIL, author = "Srinath Sridhar and Fumei Lam and Guy E. Blelloch and R. Ravi and Russell Schwartz", title = "Mixed Integer Linear Programming for Maximum-Parsimony Phylogeny Inference", journal = j-TCBB, volume = "5", number = "3", pages = "323--331", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.26", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstruction of phylogenetic trees is a fundamental problem in computational biology. While excellent heuristic methods are available for many variants of this problem, new advances in phylogeny inference will be required if we are to be able to continue to make effective use of the rapidly growing stores of variation data now being gathered. In this paper, we present two integer linear programming (ILP) formulations to find the most parsimonious phylogenetic tree from a set of binary variation data. One method uses a flow-based formulation that can produce exponential numbers of variables and constraints in the worst case. The method has, however, proven extremely efficient in practice on datasets that are well beyond the reach of the available provably efficient methods, solving several large mtDNA and Y-chromosome instances within a few seconds and giving provably optimal results in times competitive with fast heuristics than cannot guarantee optimality. An alternative formulation establishes that the problem can be solved with a polynomial-sized ILP. We further present a web server developed based on the exponential-sized ILP that performs fast maximum parsimony inferences and serves as a front end to a database of precomputed phylogenies spanning the human genome.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithms; computational biology; integer linear programming; maximum parsimony; phylogenetic tree reconstruction; Steiner tree problem", } @Article{Bernt:2008:SPR, author = "Matthias Bernt and Daniel Merkle and Martin Middendorf", title = "Solving the Preserving Reversal Median Problem", journal = j-TCBB, volume = "5", number = "3", pages = "332--347", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.39", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genomic rearrangement operations can be very useful to infer the phylogenetic relationship of gene orders representing species. We study the problem of finding potential ancestral gene orders for the gene orders of given taxa, such that the corresponding rearrangement scenario has a minimal number of reversals, and where each of the reversals has to preserve the common intervals of the given input gene orders. Common intervals identify sets of genes that occur consecutively in all input gene orders. The problem of finding such an ancestral gene order is called the preserving reversal median problem (pRMP). A tree-based data structure for the representation of the common intervals of all input gene orders is used in our exact algorithm TCIP for solving the pRMP. It is known that the minimum number of reversals to transform one gene order into another can be computed in polynomial time, whereas the corresponding problem with the restriction that common intervals should not be destroyed is already NP-hard. It is shown theoretically that TCIP can solve a large class of pRMP instances in polynomial time. Empirically we show the good performance of TCIP on biological and artificial data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; permutations and combinations", } @Article{Braga:2008:ESS, author = "Mar{\'\i}lia D. V. Braga and Marie-France Sagot and Celine Scornavacca and Eric Tannier", title = "Exploring the Solution Space of Sorting by Reversals, with Experiments and an Application to Evolution", journal = j-TCBB, volume = "5", number = "3", pages = "348--356", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.16", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In comparative genomics, algorithms that sort permutations by reversals are often used to propose evolutionary scenarios of rearrangements between species. One of the main problems of such methods is that they give one solution while the number of optimal solutions is huge, with no criteria to discriminate among them. Bergeron et al. started to give some structure to the set of optimal solutions, in order to be able to deliver more presentable results than only one solution or a complete list of all solutions. However, no algorithm exists so far to compute this structure except through the enumeration of all solutions, which takes too much time even for small permutations. Bergeron et al. state as an open problem the design of such an algorithm. We propose in this paper an answer to this problem, that is, an algorithm which gives all the classes of solutions and counts the number of solutions in each class, with a better theoretical and practical complexity than the complete enumeration method. We give an example of how to reduce the number of classes obtained, using further constraints. Finally, we apply our algorithm to analyse the possible scenarios of rearrangement between mammalian sex chromosomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "common intervals; evolution; genome rearrangements; perfect sorting; sex chromosomes; signed permutations; sorting by reversals", } @Article{Vassura:2008:RSP, author = "Marco Vassura and Luciano Margara and Pietro {Di Lena} and Filippo Medri and Piero Fariselli and Rita Casadio", title = "Reconstruction of {$3$D} Structures From Protein Contact Maps", journal = j-TCBB, volume = "5", number = "3", pages = "357--367", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.27", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The prediction of the protein tertiary structure from solely its residue sequence (the so called Protein Folding Problem) is one of the most challenging problems in Structural Bioinformatics. We focus on the protein residue contact map. When this map is assigned it is possible to reconstruct the 3D structure of the protein backbone. The general problem of recovering a set of 3D coordinates consistent with some given contact map is known as a unit-disk-graph realization problem and it has been recently proven to be NP-Hard. In this paper we describe a heuristic method (COMAR) that is able to reconstruct with an unprecedented rate (3-15 seconds) a 3D model that exactly matches the target contact map of a protein. Working with a non-redundant set of 1760 proteins, we find that the scoring efficiency of finding a 3D model very close to the protein native structure depends on the threshold value adopted to compute the protein residue contact map. Contact maps whose threshold values range from 10 to 18 {\AA}ngstroms allow reconstructing 3D models that are very similar to the proteins native structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "combinatorial algorithms; contact map; molecular modeling; protein structure prediction", } @Article{Lee:2008:IEN, author = "George Lee and Carlos Rodriguez and Anant Madabhushi", title = "Investigating the Efficacy of Nonlinear Dimensionality Reduction Schemes in Classifying Gene and Protein Expression Studies", journal = j-TCBB, volume = "5", number = "3", pages = "368--384", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.36", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The recent explosion in procurement and availability of high-dimensional gene- and protein-expression profile datasets for cancer diagnostics has necessitated the development of sophisticated machine learning tools with which to analyze them. A major limitation in the ability to accurate classify these high-dimensional datasets stems from the `curse of dimensionality', occurring in situations where the number of genes or peptides significantly exceeds the total number of patient samples. Previous attempts at dealing with this issue have mostly centered on the use of a dimensionality reduction (DR) scheme, Principal Component Analysis (PCA), to obtain a low-dimensional projection of the high-dimensional data. However, linear PCA and other linear DR methods, which rely on Euclidean distances to estimate object similarity, do not account for the inherent underlying nonlinear structure associated with most biomedical data. The motivation behind this work is to identify the appropriate DR methods for analysis of high-dimensional gene- and protein-expression studies. Towards this end, we empirically and rigorously compare three nonlinear (Isomap, Locally Linear Embedding, Laplacian Eigenmaps) and three linear DR schemes (PCA, Linear Discriminant Analysis, Multidimensional Scaling) with the intent of determining a reduced subspace representation in which the individual object classes are more easily discriminable.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "and association rules; Bioinformatics (genome or protein) databases; classification; clustering; data and knowledge visualization; data mining; feature extraction or construction", } @Article{Cho:2008:CHC, author = "Hyuk Cho and Inderjit S. Dhillon", title = "Coclustering of Human Cancer Microarrays Using Minimum Sum-Squared Residue Coclustering", journal = j-TCBB, volume = "5", number = "3", pages = "385--400", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70268", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It is a consensus in microarray analysis that identifying potential local patterns, characterized by coherent groups of genes and conditions, may shed light on the discovery of previously undetectable biological cellular processes of genes as well as macroscopic phenotypes of related samples. In order to simultaneously cluster genes and conditions, we have previously developed a fast co-clustering algorithm, Minimum Sum-Squared Residue Co-clustering (MSSRCC), which employs an alternating minimization scheme and generates what we call co-clusters in a checkerboard structure. In this paper, we propose specific strategies that enable MSSRCC to escape poor local minima and resolve the degeneracy problem in partitional clustering algorithms. The strategies include binormalization, deterministic spectral initialization, and incremental local search. We assess the effects of various strategies on both synthetic gene expression datasets and real human cancer microarrays and provide empirical evidence that MSSRCC with the proposed strategies performs better than existing co-clustering and clustering algorithms. In particular, the combination of all the three strategies leads to the best performance. Furthermore, we illustrate coherence of the resulting co-clusters in a checkerboard structure, where genes in a co-cluster manifest the phenotype structure of corresponding specific samples, and evaluate the enrichment of functional annotations in Gene Ontology (GO).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "binormalization; co-clustering; deterministic spectral initialization; gene ontology; local search; microarray analysis", } @Article{Wei:2008:IGF, author = "Peng Wei and Wei Pan", title = "Incorporating Gene Functions into Regression Analysis of {DNA}-Protein Binding Data and Gene Expression Data to Construct Transcriptional Networks", journal = j-TCBB, volume = "5", number = "3", pages = "401--415", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.1062", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Useful information on transcriptional networks has been extracted by regression analyses of gene expression data and DNA-protein binding data. However, a potential limitation of these approaches is their assumption on the common and constant activity level of a transcription factor (TF) on all the genes in any given experimental condition; for example, any TF is assumed to be either an activator or a repressor, but not both, while it is known that some TFs can be dual regulators. Rather than assuming a common linear regression model for all the genes, we propose using separate regression models for various gene groups; the genes can be grouped based on their functions or some clustering results. Furthermore, to take advantage of the hierarchical structure of many existing gene function annotation systems, such as Gene Ontology (GO), we propose a shrinkage method that borrows information from relevant gene groups. Applications to a yeast dataset and simulations lend support for our proposed methods. In particular, we find that the shrinkage method consistently works well under various scenarios. We recommend the use of the shrinkage method as a useful alternative to the existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "LASSO; microarray; shrinkage estimator; stratified analysis; transcription factor", } @Article{Mak:2008:PPS, author = "Man-Wai Mak and Jian Guo and Sun-Yuan Kung", title = "{PairProSVM}: Protein Subcellular Localization Based on Local Pairwise Profile Alignment and {SVM}", journal = j-TCBB, volume = "5", number = "3", pages = "416--422", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70256", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The subcellular locations of proteins are important functional annotations. An effective and reliable subcellular localization method is necessary for proteomics research. This paper introduces a new method---PairProSVM---to automatically predict the subcellular locations of proteins. The profiles of all protein sequences in the training set are constructed by PSI-BLAST and the pairwise profile-alignment scores are used to form feature vectors for training a support vector machine (SVM) classifier. It was found that PairProSVM outperforms the methods that are based on sequence alignment and amino-acid compositions even if most of the homologous sequences have been removed. This paper also demonstrates that the performance of PairProSVM is sensitive (and somewhat proportional) to the degree of its kernel matrix meeting the Mercer's condition. PairProSVM was evaluated on Reinhardt and Hubbard's, Huang and Li's, and Gardy et al.'s protein datasets. The overall accuracies on these three datasets reach 99.3\%, 76.5\%, and 91.9\%, respectively, which are higher than or comparable to those obtained by sequence alignment and by the methods compared in this paper.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "kernel methods; Mercer condition; profile alignment; subcellular localization; support vector machines", } @Article{Elo:2008:ROT, author = "Laura L. Elo and Sanna Filen and Riitta Lahesmaa and Tero Aittokallio", title = "Reproducibility-Optimized Test Statistic for Ranking Genes in Microarray Studies", journal = j-TCBB, volume = "5", number = "3", pages = "423--431", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/tcbb.2007.1078", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A principal goal of microarray studies is to identify the genes showing differential expression under distinct conditions. In such studies, the selection of an optimal test statistic is a crucial challenge, which depends on the type and amount of data under analysis. While previous studies on simulated or spike-in datasets do not provide practical guidance on how to choose the best method for a given real dataset, we introduce an enhanced reproducibility-optimization procedure, which enables the selection of a suitable gene- anking statistic directly from the data. In comparison with existing ranking methods, the reproducibility-optimized statistic shows good performance consistently under various simulated conditions and on Affymetrix spike-in dataset. Further, the feasibility of the novel statistic is confirmed in a practical research setting using data from an in-house cDNA microarray study of asthma-related gene expression changes. These results suggest that the procedure facilitates the selection of an appropriate test statistic for a given dataset without relying on a priori assumptions, which may bias the findings and their interpretation. Moreover, the general reproducibility-optimization procedure is not limited to detecting differential expression only but could be extended to a wide range of other applications as well.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bootstrap; differential expression; gene expression; gene ranking; microarray; reproducibility", } @Article{Parker:2008:SPT, author = "Douglass Stott Parker and Ruey-Lung Hsiao and Yi Xing and Alissa M. Resch and Christopher J. Lee", title = "Solving the Problem of Trans-Genomic Query with Alignment Tables", journal = j-TCBB, volume = "5", number = "3", pages = "432--447", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.1073", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The trans-genomic query (TGQ) problem -- enabling the free query of biological information, even across genomes -- is a central challenge facing bioinformatics. Solutions to this problem can alter the nature of the field, moving it beyond the jungle of data integration and expanding the number and scope of questions that can be answered. An alignment table is a binary relationship on locations (sequence segments). An important special case of alignment tables are hit tables --- tables of pairs of highly similar segments produced by alignment tools like BLAST. However, alignment tables also include general binary relationships, and can represent any useful connection between sequence locations. They can be curated, and provide a high-quality queryable backbone of connections between biological information. Alignment tables thus can be a natural foundation for TGQ, as they permit a central part of the TGQ problem to be reduced to purely technical problems involving tables of locations. Key challenges in implementing alignment tables include efficient representation and indexing of sequence locations. We define a location datatype that can be incorporated naturally into common off-the-shelf database systems. We also describe an implementation of alignment tables in BLASTGRES, an extension of the open-source POSTGRESQL database system that provides indexing and operators on locations required for querying alignment tables. This paper also reviews several successful large-scale applications of alignment tables for Trans-Genomic Query. Tables with millions of alignments have been used in queries about alternative splicing, an area of genomic analysis concerning the way in which a single gene can yield multiple transcripts. Comparative genomics is a large potential application area for TGQ and alignment tables.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dawy:2008:FSG, author = "Zaher Dawy and Michel Sarkis and Joachim Hagenauer and Jakob C. Mueller", title = "Fine-Scale Genetic Mapping Using Independent Component Analysis", journal = j-TCBB, volume = "5", number = "3", pages = "448--460", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.1072", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The aim of genetic mapping is to locate the loci responsible for specific traits such as complex diseases. These traits are normally caused by mutations at multiple loci of unknown locations and interactions. In this work, we model the biological system that relates DNA polymorphisms with complex traits as a linear mixing process. Given this model, we propose a new fine-scale genetic mapping method based on independent component analysis. The proposed method outputs both independent associated groups of SNPs in addition to specific associated SNPs with the phenotype. It is applied to a clinical data set for the Schizophrenia disease with 368 individuals and 42 SNPs. It is also applied to a simulation study to investigate in more depth its performance. The obtained results demonstrate the novel characteristics of the proposed method compared to other genetic mapping methods. Finally, we study the robustness of the proposed method with missing genotype values and limited sample sizes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "association mapping; complex diseases; independent component analysis (ICA); linkage disequilibrium; principal component analysis (PCA); single nucleotide polymorphisms (SNPs)", } @Article{Hendy:2008:HCK, author = "Michael D. Hendy and Sagi Snir", title = "{Hadamard} Conjugation for the {Kimura} {3ST} Model: Combinatorial Proof Using Path Sets", journal = j-TCBB, volume = "5", number = "3", pages = "461--471", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70227", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Under a stochastic model of molecular sequence evolution the probability of each possible pattern of a characters is well defined. The Kimura's three-substitution-types (K3ST) model of evolution, allows analytical expression for these probabilities of by means of the Hadamard conjugation as a function of the phylogeny T and the substitution probabilities on each edge of TM. In this paper we produce a direct combinatorial proof of these results, using pathset distances which generalise pairwise distances between sequences. This interpretation provides us with tools that were proved useful in related problems in the mathematical analysis of sequence evolution.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Hadamard conjugation; K3ST model; path-sets; phylogenetic invariants; phylogenetic trees", } @Article{Gambette:2008:ILP, author = "Philippe Gambette and Daniel H. Huson", title = "Improved Layout of Phylogenetic Networks", journal = j-TCBB, volume = "5", number = "3", pages = "472--479", month = jul, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/tcbb.2007.1046", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Oct 10 12:59:44 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Split networks are increasingly being used in phylogenetic analysis. Usually, a simple equal angle algorithm is used to draw such networks, producing layouts that leave much room for improvement. Addressing the problem of producing better layouts of split networks, this paper presents an algorithm for maximizing the area covered by the network, describes an extension of the equal-daylight algorithm to networks, looks into using a spring embedder and discusses how to construct rooted split networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithms; graph drawing; phylogenetic networks; phylogenetics", } @Article{Gusfield:2008:EE, author = "Dan Gusfield", title = "{EIC} Editorial", journal = j-TCBB, volume = "5", number = "4", pages = "481--481", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.115", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Giancarlo:2008:GEI, author = "Raffaele Giancarlo and Sridhar Hannenhalli", title = "{Guest Editors}' Introduction to the Special Section on Algorithms in Bioinformatics", journal = j-TCBB, volume = "5", number = "4", pages = "482--483", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.116", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jeong:2008:ISP, author = "Jieun Jeong and Piotr Berman and Teresa M. Przytycka", title = "Improving Strand Pairing Prediction through Exploring Folding Cooperativity", journal = j-TCBB, volume = "5", number = "4", pages = "484--491", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.88", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The topology of $ \beta $-sheets is defined by the pattern of hydrogen-bonded strand pairing. Therefore, predicting hydrogen bonded strand partners is a fundamental step towards predicting $ \beta $-sheet topology. At the same time, finding the correct partners is very difficult due to long range interactions involved in strand pairing. Additionally, patterns of aminoacids involved, in $ \beta $-sheet formations are very general and therefore difficult to use for computational recognition of specific contacts between strands. In this work, we report a new strand pairing algorithm. To address above mentioned difficulties, our algorithm attempts to mimic elements of the folding process. Namely, in addition to ensuring that the predicted hydrogen bonded strand pairs satisfy basic global consistency constraints, it takes into account hypothetical folding pathways. Consistently with this view, introducing hydrogen bonds between a pair of strands changes the probabilities of forming hydrogen bonds between other pairs of strand. We demonstrate that this approach provides an improvement over previously proposed algorithms. We also compare the performance of this method to that of a global optimization algorithm that poses the problem as integer linear programming optimization problem and solves it using ILOG CPLEX\TM{} package.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; Combinatorial algorithms", } @Article{Genovese:2008:SAH, author = "Loredana M. Genovese and Filippo Geraci and Marco Pellegrini", title = "{SpeedHap}: An Accurate Heuristic for the Single Individual {SNP} Haplotyping Problem with Many Gaps, High Reading Error Rate and Low Coverage", journal = j-TCBB, volume = "5", number = "4", pages = "492--502", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.67", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Single nucleotide polymorphism (SNP) is the most frequent form of DNA variation. The set of SNP's present in a chromosome (called the em haplotype) is of interest in a wide area of applications in molecular biology and biomedicine, including diagnostic and medical therapy. In this paper we propose a new heuristic method for the problem of haplotype reconstruction for (portions of) a pair of homologous human chromosomes from a single individual (SIH). The problem is well known in literature and exact algorithms have been proposed for the case when no (or few) gaps are allowed in the input fragments. These algorithms, though exact and of polynomial complexity, are slow in practice. When gaps are considered no exact method of polynomial complexity is known. The problem is also hard to approximate with guarantees. Therefore fast heuristics have been proposed. In this paper we describe SpeedHap, a new heuristic method that is able to tackle the case of many gapped fragments and retains its effectiveness even when the input fragments have high rate of reading errors (up to 20\%) and low coverage (as low as 3). We test SpeedHap on real data from the HapMap Project.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Algorithms; Biology and genetics", } @Article{Lozano:2008:STA, author = "Antoni Lozano and Ron Y. Pinter and Oleg Rokhlenko and Gabriel Valiente and Michal Ziv-Ukelson", title = "Seeded Tree Alignment", journal = j-TCBB, volume = "5", number = "4", pages = "503--513", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.59", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The optimal transformation of one tree into another by means of elementary edit operations is an important algorithmic problem that has several interesting applications to computational biology. Here we introduce a constrained form of this problem in which a partial mapping of a set of nodes (the `seeds') in one tree to a corresponding set of nodes in the other tree is given, and present efficient algorithms for both ordered and unordered trees. Whereas ordered tree matching based on seeded nodes has applications in pattern matching of RNA structures, unordered tree matching based on seeded nodes has applications in co-speciation and phylogeny reconciliation. The latter involves the solution of the planar tanglegram layout problem, for which a polynomial-time algorithm is given here.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; Computer Applications; Discrete Mathematics; Graph algorithms; Graph Theory; Life and Medical Sciences; Mathematics of Computing; Trees", } @Article{Bansal:2008:STH, author = "Mukul S. Bansal and Oliver Eulenstein", title = "An {$ \Omega (n^2 / \log n) $} Speed-Up of {TBR} Heuristics for the Gene-Duplication Problem", journal = j-TCBB, volume = "5", number = "4", pages = "514--524", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.69", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The gene-duplication problem is to infer a species supertree from gene trees that are confounded by complex histories of gene duplications. This problem is NP-hard and thus requires efficient and effective heuristics. Existing heuristics perform a stepwise search of the tree space, where each step is guided by an exact solution to an instance of a local search problem. We improve on the time complexity of the local search problem by a factor of $ n^2 = \log n $, where $n$ is the size of the resulting species supertree. Typically, several thousand instances of the local search problem are solved throughout a stepwise heuristic search. Hence, our improvement makes the gene-duplication problem much more tractable for large-scale phylogenetic analyses.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Algorithms; Computational Biology; Gene Duplication; Phylogenetics; Supertrees", } @Article{Wang:2008:DCO, author = "Xueyi Wang and Jack Snoeyink", title = "Defining and Computing Optimum {RMSD} for Gapped and Weighted Multiple-Structure Alignment", journal = j-TCBB, volume = "5", number = "4", pages = "525--533", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.92", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pairwise structure alignment commonly uses root mean square deviation (RMSD) to measure the structural similarity, and methods for optimizing RMSD are well established. We extend RMSD to weighted RMSD for multiple structures. By using multiplicative weights, we show that weighted RMSD for all pairs is the same as weighted RMSD to an average of the structures. Thus, using RMSD or weighted RMSD implies that the average is a consensus structure. Although we show that in general, the two tasks of finding the optimal translations and rotations for minimizing weighted RMSD cannot be separated for multiple structures like they can for pairs, an inherent difficulty and a fact ignored by previous work, we develop a near-linear iterative algorithm to converge weighted RMSD to a local minimum. 10,000 experiments of gapped alignment done on each of 23 protein families from HOMSTRAD (where each structure starts with a random translation and rotation) converge rapidly to the same minimum. Finally we propose a heuristic method to iteratively remove the effect of outliers and find well-aligned positions that determine the structural conserved region by modeling B-factors and deviations from the average positions as weights and iteratively assigning higher weights to better aligned atoms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "multiple structure alignment; optimization methods; structural conserved region; weighted RMSD", } @Article{Yao:2008:EAE, author = "Peggy Yao and Ankur Dhanik and Nathan Marz and Ryan Propper and Charles Kou and Guanfeng Liu and Henry van den Bedem and Jean-Claude Latombe and Inbal Halperin-Landsberg and Russ B. Altman", title = "Efficient Algorithms to Explore Conformation Spaces of Flexible Protein Loops", journal = j-TCBB, volume = "5", number = "4", pages = "534--545", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.96", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Several applications in biology --- e.g., incorporation of protein flexibility in ligand docking algorithms, interpretation of fuzzy X-ray crystallographic data, and homology modeling --- require computing the internal parameters of a flexible fragment (usually, a loop) of a protein in order to connect its termini to the rest of the protein without causing any steric clash. One must often sample many such conformations in order to explore and adequately represent the conformational range of the studied loop. While sampling must be fast, it is made difficult by the fact that two conflicting constraints --- kinematic closure and clash avoidance --- must be satisfied concurrently. This paper describes two efficient and complementary sampling algorithms to explore the space of closed clash-free conformations of a flexible protein loop. The `seed sampling' algorithm samples broadly from this space, while the `deformation sampling' algorithm uses seed conformations as starting points to explore the conformation space around them at a finer grain. Computational results are presented for various loops ranging from 5 to 25 residues. More specific results also show that the combination of the sampling algorithms with a functional site prediction software (FEATURE) makes it possible to compute and recognize calcium-binding loop conformations. The sampling algorithms are implemented in a toolkit (LoopTK), which is available at \path=https://simtk.org/home/looptk=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; Robotics", } @Article{Kim:2008:LSS, author = "Eagu Kim and John Kececioglu", title = "Learning Scoring Schemes for Sequence Alignment from Partial Examples", journal = j-TCBB, volume = "5", number = "4", pages = "546--556", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.57", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "When aligning biological sequences, the choice of parameter values for the alignment scoring function is critical. Small changes in gap penalties, for example, can yield radically different alignments. A rigorous way to compute parameter values that are appropriate for aligning biological sequences is through inverse parametric sequence alignment. Given a collection of examples of biologically correct alignments, this is the problem of finding parameter values that make the scores of the example alignments close to those of optimal alignments for their sequences. We extend prior work on inverse parametric alignment to partial examples, which contain regions where the alignment is left unspecified, and to an improved formulation based on minimizing the average error between the score of an example and the score of an optimal alignment. Experiments on benchmark biological alignments show we can find parameters that generalize across protein families and that boost the accuracy of multiple sequence alignment by as much as 25\%.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Analysis of Algorithms and Problem Complexity; Biology and genetics; Linear programming; Pattern matching", } @Article{Schliep:2008:EAC, author = "Alexander Schliep and Roland Krause", title = "Efficient Algorithms for the Computational Design of Optimal Tiling Arrays", journal = j-TCBB, volume = "5", number = "4", pages = "557--567", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.50", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The representation of a genome by oligonucleotide probes is a prerequisite for the analysis of many of its basic properties, such as transcription factor binding sites, chromosomal breakpoints, gene expression of known genes and detection of novel genes, in particular those coding for small RNAs. An ideal representation would consist of a high density set of oligonucleotides with similar melting temperatures that do not cross-hybridize with other regions of the genome and are equidistantly spaced. The implementation of such design is typically called a tiling array or genome array. We formulate the minimal cost tiling path problem for the selection of oligonucleotides from a set of candidates. Computing the selection of probes requires multi-criterion optimization, which we cast into a shortest path problem. Standard algorithms running in linear time allow us to compute globally optimal tiling paths from millions of candidate oligonucleotides on a standard desktop computer for most problem variants. The solutions to this multi-criterion optimization are spatially adaptive to the problem instance. Our formulation incorporates experimental constraints with respect to specific regions of interest and trade offs between hybridization parameters, probe quality and tiling density easily.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; Graph Theory", } @Article{Yu:2008:CAA, author = "Zeyun Yu and Chandrajit Bajaj", title = "Computational Approaches for Automatic Structural Analysis of Large Biomolecular Complexes", journal = j-TCBB, volume = "5", number = "4", pages = "568--582", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70226", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present computational solutions to two problems of macromolecular structure interpretation from reconstructed three-dimensional electron microscopy (3D-EM) maps of large bio-molecular complexes at intermediate resolution (5A-15A). The two problems addressed are: (a) 3D structural alignment (matching)between identified and segmented 3D maps of structure units(e.g. trimeric configuration of proteins), and (b) the secondary structure identification of a segmented protein 3D map (i.e., locations of $ \alpha $-helices, $ \beta $-sheets). For problem (a), we present an efficient algorithm to correlate spatially (and structurally)two 3D maps of structure units. Besides providing a similarity score between structure units, the algorithm yields an effective technique for resolution refinement of repeated structure units,by 3D alignment and averaging. For problem (b), we present an efficient algorithm to compute eigenvalues and link eigenvectors of a Gaussian convoluted structure tensor derived from the protein 3D Map, thereby identifying and locating secondary structural motifs of proteins. The efficiency and performance of our approach is demonstrated on several experimentally reconstructed 3D maps of virus capsid shells from single-particle cryo-EM, as well as computationally simulated protein structure density 3D maps generated from protein model entries in the Protein Data Bank.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "3D Reconstruction; Alignment; Cryo-EM Maps; Secondary Structure Detection; Segmentation; Similarity Measure; Skeletonization; Structure Analysis", } @Article{Christinat:2008:GED, author = "Yann Christinat and Bernd Wachmann and Lei Zhang", title = "Gene Expression Data Analysis Using a Novel Approach to Biclustering Combining Discrete and Continuous Data", journal = j-TCBB, volume = "5", number = "4", pages = "583--593", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70251", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many different methods exist for pattern detection in gene expression data. In contrast to classical methods, biclustering has the ability to cluster a group of genes together with a group of conditions (replicates, set of patients or drug compounds). However, since the problem is NP-complex, most algorithms use heuristic search functions and therefore might converge towards local maxima. By using the results of biclustering on discrete data as a starting point for a local search function on continuous data, our algorithm avoids the problem of heuristic initialization. Similar to OPSM, our algorithm aims to detect biclusters whose rows and columns can be ordered such that row values are growing across the bicluster's columns and vice-versa. Results have been generated on the yeast genome (Saccharomyces cerevisiae), a human cancer dataset and random data. Results on the yeast genome showed that 89\% of the one hundred biggest non-overlapping biclusters were enriched with Gene Ontology annotations. A comparison with OPSM and ISA demonstrated a better efficiency when using gene and condition orders. We present results on random and real datasets that show the ability of our algorithm to capture statistically significant and biologically relevant biclusters.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Bioinformatics (genome or protein) databases; Data and knowledge visualization; Data mining; Graph and tree search strategies; Machine learning", } @Article{Lacroix:2008:IMN, author = "Vincent Lacroix and Ludovic Cottret and Patricia Th{\'e}bault and Marie-France Sagot", title = "An Introduction to Metabolic Networks and Their Structural Analysis", journal = j-TCBB, volume = "5", number = "4", pages = "594--617", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.79", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "There has been a renewed interest for metabolism in the computational biology community, leading to an avalanche of papers coming from methodological network analysis as well as experimental and theoretical biology. This paper is meant to serve as an initial guide for both the biologists interested in formal approaches and the mathematicians or computer scientists wishing to inject more realism into their models. The paper is focused on the structural aspects of metabolism only. The literature is vast enough already, and the thread through it difficult to follow even for the more experienced worker in the field. We explain methods for acquiring data and reconstructing metabolic networks, and review the various models that have been used for their structural analysis. Several concepts such as modularity are introduced, as are the controversies that have beset the field these past few years, for instance, on whether metabolic networks are small-world or scale-free, and on which model better explains the evolution of metabolism. Clarifying the work that has been done also helps in identifying open questions and in proposing relevant future directions in the field, which we do along the paper and in the conclusion.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; evolution; Graph Theory; Introductory and Survey; metabolic networks; modelling; modularity; reconstruction", } @Article{Satya:2008:UIP, author = "Ravi Vijaya Satya and Amar Mukherjee", title = "The Undirected Incomplete Perfect Phylogeny Problem", journal = j-TCBB, volume = "5", number = "4", pages = "618--629", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70218", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The incomplete perfect phylogeny (IPP) problem and the incomplete perfect phylogeny haplotyping (IPPH) problem deal with constructing a phylogeny for a given set of haplotypes or genotypes with missing entries. The earlier approaches for both of these problems dealt with restricted versions of the problems, where the root is either available or can be trivially re-constructed from the data, or certain assumptions were made about the data. In this paper, we deal with the unrestricted versions of the problems, where the root of the phylogeny is neither available nor trivially recoverable from the data. Both IPP and IPPH problems have previously been proven to be NP complete. Here, we present efficient enumerative algorithms that can handle practical instances of the problem. Empirical analysis on simulated data shows that the algorithms perform very well both in terms of speed and in terms accuracy of the recovered data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Haplotype Inference; Incomplete Perfect Phylogeny; Perfect Phylogeny; Phylogenetics", } @Article{Gondro:2008:OCM, author = "Cedric Gondro and Brian P. Kinghorn", title = "Optimization of {cDNA} Microarray Experimental Designs Using an Evolutionary Algorithm", journal = j-TCBB, volume = "5", number = "4", pages = "630--638", month = oct, year = "2008", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70222", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 14 12:51:33 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The cDNA microarray is an important tool for generating large datasets of gene expression measurements. An efficient design is critical to ensure that the experiment will be able to address relevant biological questions. Microarray experimental design can be treated as a multicriterion optimization problem. For this class of problems evolutionary algorithms (EAs) are well suited, as they can search the solution space and evolve a design that optimizes the parameters of interest based on their relative value to the researcher under a given set of constraints. This paper introduces the use of EAs for optimization of experimental designs of spotted microarrays using a weighted objective function. The EA and the various criteria relevant to design optimization are discussed. Evolved designs are compared with designs obtained through exhaustive search with results suggesting that the EA can find just as efficient optimal or near-optimal designs within a tractable timeframe.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Evolutionary computing and genetic algorithms; experimental design; global optimization; microarrays", } @Article{Gusfield:2009:FFY, author = "Dan Gusfield", title = "Final, Five-Year End, Editorial", journal = j-TCBB, volume = "6", number = "1", pages = "1--2", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sagot:2009:NEE, author = "Marie-France Sagot", title = "New {EIC} Editorial", journal = j-TCBB, volume = "6", number = "1", pages = "3--3", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huson:2009:SSP, author = "Daniel H. Huson and Vincent Moulton and Mike Steel", title = "Special Section: Phylogenetics", journal = j-TCBB, volume = "6", number = "1", pages = "4--6", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2009:BWT, author = "Kevin Liu and Serita Nelesen and Sindhu Raghavan and C. Randal Linder and Tandy Warnow", title = "Barking Up The Wrong Treelength: The Impact of Gap Penalty on Alignment and Tree Accuracy", journal = j-TCBB, volume = "6", number = "1", pages = "7--21", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Minh:2009:BPD, author = "Bui Quang Minh and Fabio Pardi and Steffen Klaere and Arndt von Haeseler", title = "Budgeted Phylogenetic Diversity on Circular Split Systems", journal = j-TCBB, volume = "6", number = "1", pages = "22--29", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Linz:2009:HNT, author = "Simone Linz and Charles Semple", title = "Hybridization in Nonbinary Trees", journal = j-TCBB, volume = "6", number = "1", pages = "30--45", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cardona:2009:MPNa, author = "Gabriel Cardona and Merc{\`e} Llabr{\'e}s and Francesc Rossell{\'o} and Gabriel Valiente", title = "Metrics for Phylogenetic Networks {I}: Generalizations of the {Robinson--Foulds} Metric", journal = j-TCBB, volume = "6", number = "1", pages = "46--61", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Willson:2009:RTS, author = "Stephen J. Willson", title = "Robustness of Topological Supertree Methods for Reconciling Dense Incompatible Data", journal = j-TCBB, volume = "6", number = "1", pages = "62--75", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Allman:2009:ICM, author = "Elizabeth S. Allman and John A. Rhodes", title = "The Identifiability of Covarion Models in Phylogenetics", journal = j-TCBB, volume = "6", number = "1", pages = "76--88", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Matsen:2009:FTI, author = "Frederick A. Matsen", title = "{Fourier} Transform Inequalities for Phylogenetic Trees", journal = j-TCBB, volume = "6", number = "1", pages = "89--95", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gusfield:2009:OEE, author = "Dan Gusfield", title = "Outgoing {EIC} Editorial for this Special Section of {TCBB} with the Theme of Phylogenetics", journal = j-TCBB, volume = "6", number = "1", pages = "96--96", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Grunewald:2009:MPT, author = "Stefan Gr{\"u}newald and Vincent Moulton", title = "Maximum Parsimony for Tree Mixtures", journal = j-TCBB, volume = "6", number = "1", pages = "97--102", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huson:2009:DRP, author = "Daniel H. Huson", title = "Drawing Rooted Phylogenetic Networks", journal = j-TCBB, volume = "6", number = "1", pages = "103--109", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bordewich:2009:CTM, author = "Magnus Bordewich and Olivier Gascuel and Katharina T. Huber and Vincent Moulton", title = "Consistency of Topological Moves Based on the Balanced Minimum Evolution Principle of Phylogenetic Inference", journal = j-TCBB, volume = "6", number = "1", pages = "110--117", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2009:RPT, author = "Taoyang Wu and Vincent Moulton and Mike Steel", title = "Refining Phylogenetic Trees Given Additional Data: An Algorithm Based on Parsimony", journal = j-TCBB, volume = "6", number = "1", pages = "118--125", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mossel:2009:SEA, author = "Elchanan Mossel and Sebastien Roch and Mike Steel", title = "Shrinkage Effect in Ancestral Maximum Likelihood", journal = j-TCBB, volume = "6", number = "1", pages = "126--133", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2009:GCU, author = "Jianmin Ma and Minh N. Nguyen and Jagath C. Rajapakse", title = "Gene Classification Using Codon Usage and Support Vector Machines", journal = j-TCBB, volume = "6", number = "1", pages = "134--143", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Maitra:2009:IPO, author = "Ranjan Maitra", title = "Initializing Partition-Optimization Algorithms", journal = j-TCBB, volume = "6", number = "1", pages = "144--157", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Narasimhan:2009:SPG, author = "Sridharakumar Narasimhan and Raghunathan Rengaswamy and Rajanikanth Vadigepalli", title = "Structural Properties of Gene Regulatory Networks: Definitions and Connections", journal = j-TCBB, volume = "6", number = "1", pages = "158--170", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2009:RL, author = "Anonymous", title = "2008 Reviewers List", journal = j-TCBB, volume = "6", number = "1", pages = "171--173", month = jan, year = "2009", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 2 18:46:49 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sagot:2009:EE, author = "Marie-France Sagot", title = "{EIC} Editorial", journal = j-TCBB, volume = "6", number = "2", pages = "177--177", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.44", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mandoiu:2009:GEI, author = "Ion Mandoiu and Yi Pan and Raj Sunderraman and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "6", number = "2", pages = "178--179", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.45", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Treangen:2009:NHL, author = "Todd J. Treangen and Aaron E. Darling and Guillaume Achaz and Mark A. Ragan and Xavier Messeguer and Eduardo P. C. Rocha", title = "A Novel Heuristic for Local Multiple Alignment of Interspersed {DNA} Repeats", journal = j-TCBB, volume = "6", number = "2", pages = "180--189", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.9", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pairwise local sequence alignment methods have been the prevailing technique to identify homologous nucleotides between related species. However, existing methods that identify and align all homologous nucleotides in one or more genomes have suffered from poor scalability and limited accuracy. We propose a novel method that couples a gapped extension heuristic with an efficient filtration method for identifying interspersed repeats in genome sequences. During gapped extension, we use the MUSCLE implementation of progressive global multiple alignment with iterative refinement. The resulting gapped extensions potentially contain alignments of unrelated sequence. We detect and remove such undesirable alignments using a hidden Markov model (HMM) to predict the posterior probability of homology. The HMM emission frequencies for nucleotide substitutions can be derived from any time-reversible nucleotide substitution matrix. We evaluate the performance of our method and previous approaches on a hybrid data set of real genomic DNA with simulated interspersed repeats. Our method outperforms a related method in terms of sensitivity, positive predictive value, and localizing boundaries of homology. The described methods have been implemented in freely available software, Repeatoire, available from: \path=http://wwwabi.snv.jussieu.fr/public/Repeatoire=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "DNA repeats; gapped extension.; genome comparison; hidden Markov model; local multiple alignment; Sequence alignment", } @Article{Qiu:2009:FMK, author = "Shibin Qiu and Terran Lane", title = "A Framework for Multiple Kernel Support Vector Regression and Its Applications to {siRNA} Efficacy Prediction", journal = j-TCBB, volume = "6", number = "2", pages = "190--199", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.139", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The cell defense mechanism of RNA interference has applications in gene function analysis and promising potentials in human disease therapy. To effectively silence a target gene, it is desirable to select appropriate initiator siRNA molecules having satisfactory silencing capabilities. Computational prediction for silencing efficacy of siRNAs can assist this screening process before using them in biological experiments. String kernel functions, which operate directly on the string objects representing siRNAs and target mRNAs, have been applied to support vector regression for the prediction and improved accuracy over numerical kernels in multidimensional vector spaces constructed from descriptors of siRNA design rules. To fully utilize information provided by string and numerical data, we propose to unify the two in a kernel feature space by devising a multiple kernel regression framework where a linear combination of the kernels is used. We formulate the multiple kernel learning into a quadratically constrained quadratic programming (QCQP) problem, which although yields global optimal solution, is computationally demanding and requires a commercial solver package. We further propose three heuristics based on the principle of kernel-target alignment and predictive accuracy. Empirical results demonstrate that multiple kernel regression can improve accuracy, decrease model complexity by reducing the number of support vectors, and speed up computational performance dramatically. In addition, multiple kernel regression evaluates the importance of constituent kernels, which for the siRNA efficacy prediction problem, compares the relative significance of the design rules. Finally, we give insights into the multiple kernel regression mechanism and point out possible extensions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "multiple kernel heuristics; Multiple kernel learning; QCQP optimization; RNA interference; siRNA efficacy.; support vector regression", } @Article{Park:2009:NBI, author = "Yongjin Park and Stanley Shackney and Russell Schwartz", title = "Network-Based Inference of Cancer Progression from Microarray Data", journal = j-TCBB, volume = "6", number = "2", pages = "200--212", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cancer cells exhibit a common phenotype of uncontrolled cell growth, but this phenotype may arise from many different combinations of mutations. By inferring how cells evolve in individual tumors, a process called cancer progression, we may be able to identify important mutational events for different tumor types, potentially leading to new therapeutics and diagnostics. Prior work has shown that it is possible to infer frequent progression pathways by using gene expression profiles to estimate ``distances'' between tumors. Here, we apply gene network models to improve these estimates of evolutionary distance by controlling for correlations among coregulated genes. We test three variants of this approach: one using an optimized best-fit network, another using sampling to infer a high-confidence subnetwork, and one using a modular network inferred from clusters of similarly expressed genes. Application to lung cancer and breast cancer microarray data sets shows small improvements in phylogenies when correcting from the optimized network and more substantial improvements when correcting from the sampled or modular networks. Our results suggest that a network correction approach improves estimates of tumor similarity, but sophisticated network models are needed to control for the large hypothesis space and sparse data currently available.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; graphs and networks; machine learning.; trees", } @Article{Zhu:2009:GGA, author = "Qian Zhu and Zaky Adam and Vicky Choi and David Sankoff", title = "Generalized Gene Adjacencies, Graph Bandwidth, and Clusters in Yeast Evolution", journal = j-TCBB, volume = "6", number = "2", pages = "213--220", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.121", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a parameterized definition of gene clusters that allows us to control the emphasis placed on conserved order within a cluster. Though motivated by biological rather than mathematical considerations, this parameter turns out to be closely related to the bandwidth parameter of a graph. Our focus will be on how this parameter affects the characteristics of clusters: how numerous they are, how large they are, how rearranged they are, and to what extent they are preserved from ancestor to descendant in a phylogenetic tree. We infer the latter property by dynamic programming optimization of the presence of individual edges at the ancestral nodes of the phylogeny. We apply our analysis to a set of genomes drawn from the Yeast Gene Order Browser.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Ashbya gossypii; Candida glabrata; Comparative genomics; dynamic programming; evolution; gene clusters; genome rearrangements; graph bandwidth; Kluyveromyces lactis.; Kluyveromyces waltii; phylogeny; Saccharomyces cerevisiae; yeast", } @Article{Bansal:2009:GDP, author = "Mukul S. Bansal and Oliver Eulenstein and Andr{\'e} Wehe", title = "The Gene-Duplication Problem: Near-Linear Time Algorithms for {NNI}-Based Local Searches", journal = j-TCBB, volume = "6", number = "2", pages = "221--231", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.7", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The gene-duplication problem is to infer a species supertree from a collection of gene trees that are confounded by complex histories of gene-duplication events. This problem is NP-complete and thus requires efficient and effective heuristics. Existing heuristics perform a stepwise search of the tree space, where each step is guided by an exact solution to an instance of a local search problem. A classical local search problem is the {\tt NNI} search problem, which is based on the nearest neighbor interchange operation. In this work, we (1) provide a novel near-linear time algorithm for the {\tt NNI} search problem, (2) introduce extensions that significantly enlarge the search space of the {\tt NNI} search problem, and (3) present algorithms for these extended versions that are asymptotically just as efficient as our algorithm for the {\tt NNI} search problem. The exceptional speedup achieved in the extended {\tt NNI} search problems makes the gene-duplication problem more tractable for large-scale phylogenetic analyses. We verify the performance of our algorithms in a comparison study using sets of large randomly generated gene trees.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Computational phylogenetics; gene-duplication; local search; supertrees; {\tt NNI}.", } @Article{Sun:2009:DPP, author = "Yanni Sun and Jeremy Buhler", title = "Designing Patterns and Profiles for Faster {HMM} Search", journal = j-TCBB, volume = "6", number = "2", pages = "232--243", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.14", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Profile HMMs are powerful tools for modeling conserved motifs in proteins. They are widely used by search tools to classify new protein sequences into families based on domain architecture. However, the proliferation of known motifs and new proteomic sequence data poses a computational challenge for search, requiring days of CPU time to annotate an organism's proteome. It is highly desirable to speed up HMM search in large databases. We design PROSITE-like patterns and short profiles that are used as filters to rapidly eliminate protein-motif pairs for which a full profile HMM comparison does not yield a significant match. The design of the pattern-based filters is formulated as a multichoice knapsack problem. Profile-based filters with high sensitivity are extracted from a profile HMM based on their theoretical sensitivity and false positive rate. Experiments show that our profile-based filters achieve high sensitivity (near 100 percent) while keeping around $ 20 \times $ speedup with respect to the unfiltered search program. Pattern-based filters typically retain at least 90 percent of the sensitivity of the source HMM with $ 30 $--$ 40 \times $ speedup. The profile-based filters have sensitivity comparable to the multistage filtering strategy HMMERHEAD [15] and are faster in most of our experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics databases; Biology and genetics; hidden Markov models.; sequence similarity search", } @Article{Shaik:2009:FAS, author = "Jahangheer Shaik and Mohammed Yeasin", title = "Fuzzy-Adaptive-Subspace-Iteration-Based Two-Way Clustering of Microarray Data", journal = j-TCBB, volume = "6", number = "2", pages = "244--259", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.15", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents Fuzzy-Adaptive-Subspace-Iteration-based Two-way Clustering (FASIC) of microarray data for finding differentially expressed genes (DEGs) from two-sample microarray experiments. The concept of fuzzy membership is introduced to transform the hard adaptive subspace iteration (ASI) algorithm into a fuzzy-ASI algorithm to perform two-way clustering. The proposed approach follows a progressive framework to assign a relevance value to genes associated with each cluster. Subsequently, each gene cluster is scored and ranked based on its potential to provide a correct classification of the sample classes. These ranks are converted into $P$ values using the $R$-test, and the significance of each gene is determined. A fivefold validation is performed on the DEGs selected using the proposed approach. Empirical analyses on a number of simulated microarray data sets are conducted to quantify the results obtained using the proposed approach. To exemplify the efficacy of the proposed approach, further analyses on different real microarray data sets are also performed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "classification and association rules; Clustering; data and knowledge visualization; data mining; feature extraction or construction.", } @Article{Vignes:2009:GCI, author = "Matthieu Vignes and Florence Forbes", title = "Gene Clustering via Integrated {Markov} Models Combining Individual and Pairwise Features", journal = j-TCBB, volume = "6", number = "2", pages = "260--270", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70248", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Clustering of genes into groups sharing common characteristics is a useful exploratory technique for a number of subsequent computational analysis. A wide range of clustering algorithms have been proposed in particular to analyze gene expression data, but most of them consider genes as independent entities or include relevant information on gene interactions in a suboptimal way. We propose a probabilistic model that has the advantage to account for individual data (e.g., expression) and pairwise data (e.g., interaction information coming from biological networks) simultaneously. Our model is based on hidden Markov random field models in which parametric probability distributions account for the distribution of individual data. Data on pairs, possibly reflecting distance or similarity measures between genes, are then included through a graph, where the nodes represent the genes, and the edges are weighted according to the available interaction information. As a probabilistic model, this model has many interesting theoretical features. In addition, preliminary experiments on simulated and real data show promising results and points out the gain in using such an approach. Availability: The software used in this work is written in C++ and is available with other supplementary material at \path=http://mistis.inrialpes.fr/people/forbes/transparentia/supplementary.html=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "gene expression.; Markov random fields; metabolic networks; model-based clustering", } @Article{Heath:2009:SMN, author = "Lenwood S. Heath and Allan A. Sioson", title = "Semantics of Multimodal Network Models", journal = j-TCBB, volume = "6", number = "2", pages = "271--280", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70242", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A multimodal network (MMN) is a novel graph-theoretic formalism designed to capture the structure of biological networks and to represent relationships derived from multiple biological databases. MMNs generalize the standard notions of graphs and hypergraphs, which are the bases of current diagrammatic representations of biological phenomena, and incorporate the concept of mode. Each vertex of an MMN is a biological entity, a biot, while each modal hyperedge is a typed relationship, where the type is given by the mode of the hyperedge. The semantics of each modal hyperedge $e$ is given through denotational semantics, where a valuation function $ f \_ {e} $ defines the relationship among the values of the vertices incident on $e$. The meaning of an MMN is denoted in terms of the semantics of a hyperedge sequence. A companion paper defines MMNs and concentrates on the structural aspects of MMNs. This paper develops MMN denotational semantics when used as a representation of the semantics of biological networks and discusses applications of MMNs in managing complex biological data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biological model; biological networks; biot; denotational semantics.; graph; hypergraph; mode; Multimodal network", } @Article{Arribas-Gil:2009:SAS, author = "Ana Arribas-Gil and Dirk Metzler and Jean-Louis Plouhinec", title = "Statistical Alignment with a Sequence Evolution Model Allowing Rate Heterogeneity along the Sequence", journal = j-TCBB, volume = "6", number = "2", pages = "281--295", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70246", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a stochastic sequence evolution model to obtain alignments and estimate mutation rates between two homologous sequences. The model allows two possible evolutionary behaviors along a DNA sequence in order to determine conserved regions and take its heterogeneity into account. In our model, the sequence is divided into slow and fast evolution regions. The boundaries between these sections are not known. It is our aim to detect them. The evolution model is based on a fragment insertion and deletion process working on fast regions only and on a substitution process working on fast and slow regions with different rates. This model induces a pair hidden Markov structure at the level of alignments, thus making efficient statistical alignment algorithms possible. We propose two complementary estimation methods, namely, a Gibbs sampler for Bayesian estimation and a stochastic version of the EM algorithm for maximum likelihood estimation. Both algorithms involve the sampling of alignments. We propose a partial alignment sampler, which is computationally less expensive than the typical whole alignment sampler. We show the convergence of the two estimation algorithms when used with this partial sampler. Our algorithms provide consistent estimates for the mutation rates and plausible alignments and sequence segmentations on both simulated and real data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics.; Markov processes; mathematics and statistics; probabilistic algorithms; sequence evolution", } @Article{Weber:2009:VET, author = "Gunther H. Weber and Oliver Rubel and Min-Yu Huang and Angela H. DePace and Charless C. Fowlkes and Soile V. E. Keranen and Cris L. Luengo Hendriks and Hans Hagen and David W. Knowles and Jitendra Malik and Mark D. Biggin and Bernd Hamann", title = "Visual Exploration of Three-Dimensional Gene Expression Using Physical Views and Linked Abstract Views", journal = j-TCBB, volume = "6", number = "2", pages = "296--309", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70249", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "During animal development, complex patterns of gene expression provide positional information within the embryo. To better understand the underlying gene regulatory networks, the Berkeley Drosophila Transcription Network Project (BDTNP) has developed methods that support quantitative computational analysis of three-dimensional (3D) gene expression in early Drosophila embryos at cellular resolution. We introduce PointCloudXplore (PCX), an interactive visualization tool that supports visual exploration of relationships between different genes' expression using a combination of established visualization techniques. Two aspects of gene expression are of particular interest: (1) gene expression patterns defined by the spatial locations of cells expressing a gene and (2) relationships between the expression levels of multiple genes. PCX provides users with two corresponding classes of data views: (1) Physical Views based on the spatial relationships of cells in the embryo and (2) Abstract Views that discard spatial information and plot expression levels of multiple genes with respect to each other. Cell Selectors highlight data associated with subsets of embryo cells within a View. Using linking, these selected cells can be viewed in multiple representations. We describe PCX as a 3D gene expression visualization tool and provide examples of how it has been used by BDTNP biologists to generate new hypotheses.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "brushing; information visualization; Interactive data exploration; multiple linked views; physical views; scatter plots.; spatial expression patterns; three-dimensional gene expression; visualization", } @Article{Dougherty:2009:CBM, author = "Edward R. Dougherty and Marcel Brun and Jeffrey M. Trent and Michael L. Bittner", title = "Conditioning-Based Modeling of Contextual Genomic Regulation", journal = j-TCBB, volume = "6", number = "2", pages = "310--320", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70247", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A more complete understanding of the alterations in cellular regulatory and control mechanisms that occur in the various forms of cancer has been one of the central targets of the genomic and proteomic methods that allow surveys of the abundance and/or state of cellular macromolecules. This preference is driven both by the intractability of cancer to generic therapies, assumed to be due to the highly varied molecular etiologies observed in cancer, and by the opportunity to discern and dissect the regulatory and control interactions presented by the highly diverse assortment of perturbations of regulation and control that arise in cancer. Exploiting the opportunities for inference on the regulatory and control connections offered by these revealing system perturbations is fraught with the practical problems that arise from the way biological systems operate. Two classes of regulatory action in biological systems are particularly inimical to inference, convergent regulation, where a variety of regulatory actions result in a common set of control responses (crosstalk), and divergent regulation, where a single regulatory action produces entirely different sets of control responses, depending on cellular context (conditioning). We have constructed a coarse mathematical model of the propagation of regulatory influence in such distributed, context-sensitive regulatory networks that allows a quantitative estimation of the amount of crosstalk and conditioning associated with a candidate regulatory gene taken from a set of genes that have been profiled over a series of samples where the candidate's activity varies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Microarray; regulatory networks.", } @Article{Heath:2009:MNS, author = "Lenwood S. Heath and Allan A. Sioson", title = "Multimodal Networks: Structure and Operations", journal = j-TCBB, volume = "6", number = "2", pages = "321--332", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70243", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A multimodal network (MMN) is a novel graph-theoretic formalism designed to capture the structure of biological networks and to represent relationships derived from multiple biological databases. MMNs generalize the standard notions of graphs and hypergraphs, which are the bases of current diagrammatic representations of biological phenomena and incorporate the concept of mode. Each vertex of an MMN is a biological entity, a biot, while each modal hyperedge is a typed relationship, where the type is given by the mode of the hyperedge. The current paper defines MMNs and concentrates on the structural aspects of MMNs. A companion paper develops MMNs as a representation of the semantics of biological networks and discusses applications of the MMNs in managing complex biological data. The MMN model has been implemented in a database system containing multiple kinds of biological networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biological networks; biot.; graph; hypergraph; mode; Multimodal network", } @Article{Yukinawa:2009:OAB, author = "Naoto Yukinawa and Shigeyuki Oba and Kikuya Kato and Shin Ishii", title = "Optimal Aggregation of Binary Classifiers for Multiclass Cancer Diagnosis Using Gene Expression Profiles", journal = j-TCBB, volume = "6", number = "2", pages = "333--343", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70239", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiclass classification is one of the fundamental tasks in bioinformatics and typically arises in cancer diagnosis studies by gene expression profiling. There have been many studies of aggregating binary classifiers to construct a multiclass classifier based on one-versus-the-rest (1R), one-versus-one (11), or other coding strategies, as well as some comparison studies between them. However, the studies found that the best coding depends on each situation. Therefore, a new problem, which we call the ``optimal coding problem,'' has arisen: how can we determine which coding is the optimal one in each situation? To approach this optimal coding problem, we propose a novel framework for constructing a multiclass classifier, in which each binary classifier to be aggregated has a weight value to be optimally tuned based on the observed data. Although there is no a priori answer to the optimal coding problem, our weight tuning method can be a consistent answer to the problem. We apply this method to various classification problems including a synthesized data set and some cancer diagnosis data sets from gene expression profiling. The results demonstrate that, in most situations, our method can improve classification accuracy over simple voting heuristics and is better than or comparable to state-of-the-art multiclass predictors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "cancer diagnosis.; error correcting output coding; gene expression profiling; Multiclass classification", } @Article{Olman:2009:PCA, author = "Victor Olman and Fenglou Mao and Hongwei Wu and Ying Xu", title = "Parallel Clustering Algorithm for Large Data Sets with Applications in Bioinformatics", journal = j-TCBB, volume = "6", number = "2", pages = "344--352", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70272", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Large sets of bioinformatical data provide a challenge in time consumption while solving the cluster identification problem, and that is why a parallel algorithm is so needed for identifying dense clusters in a noisy background. Our algorithm works on a graph representation of the data set to be analyzed. It identifies clusters through the identification of densely intraconnected subgraphs. We have employed a minimum spanning tree (MST) representation of the graph and solve the cluster identification problem using this representation. The computational bottleneck of our algorithm is the construction of an MST of a graph, for which a parallel algorithm is employed. Our high-level strategy for the parallel MST construction algorithm is to first partition the graph, then construct MSTs for the partitioned subgraphs and auxiliary bipartite graphs based on the subgraphs, and finally merge these MSTs to derive an MST of the original graph. The computational results indicate that when running on 150 CPUs, our algorithm can solve a cluster identification problem on a data set with 1,000,000 data points almost 100 times faster than on single CPU, indicating that this program is capable of handling very large data clustering problems in an efficient manner. We have implemented the clustering algorithm as the software CLUMP.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "clustering algorithm; genome application; parallel processing.; Pattern recognition", } @Article{Paul:2009:PCC, author = "Topon Kumar Paul and Hitoshi Iba", title = "Prediction of Cancer Class with Majority Voting Genetic Programming Classifier Using Gene Expression Data", journal = j-TCBB, volume = "6", number = "2", pages = "353--367", month = apr, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70245", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 1 17:03:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In order to get a better understanding of different types of cancers and to find the possible biomarkers for diseases, recently, many researchers are analyzing the gene expression data using various machine learning techniques. However, due to a very small number of training samples compared to the huge number of genes and class imbalance, most of these methods suffer from overfitting. In this paper, we present a majority voting genetic programming classifier (MVGPC) for the classification of microarray data. Instead of a single rule or a single set of rules, we evolve multiple rules with genetic programming (GP) and then apply those rules to test samples to determine their labels with majority voting technique. By performing experiments on four different public cancer data sets, including multiclass data sets, we have found that the test accuracies of MVGPC are better than those of other methods, including AdaBoost with GP. Moreover, some of the more frequently occurring genes in the classification rules are known to be associated with the types of cancers being studied in this paper.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Classifier design and evaluation; data mining; evolutionary computing and genetic algorithm; feature extraction; gene expression; majority voting.", } @Article{Sagot:2009:EEI, author = "Marie-France Sagot", title = "{EIC} Editorial: Introducing New {Associate Editors}", journal = j-TCBB, volume = "6", number = "3", pages = "369--369", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.66", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bi:2009:MCE, author = "Chengpeng Bi", title = "A {Monte Carlo} {EM} Algorithm for {De Novo Motif} Discovery in Biomolecular Sequences", journal = j-TCBB, volume = "6", number = "3", pages = "370--386", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.103", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Motif discovery methods play pivotal roles in deciphering the genetic regulatory codes (i.e., motifs) in genomes as well as in locating conserved domains in protein sequences. The Expectation Maximization (EM) algorithm is one of the most popular methods used in de novo motif discovery. Based on the position weight matrix (PWM) updating technique, this paper presents a Monte Carlo version of the EM motif-finding algorithm that carries out stochastic sampling in local alignment space to overcome the conventional EM's main drawback of being trapped in a local optimum. The newly implemented algorithm is named as Monte Carlo EM Motif Discovery Algorithm (MCEMDA). MCEMDA starts from an initial model, and then it iteratively performs Monte Carlo simulation and parameter update until convergence. A log-likelihood profiling technique together with the top-$k$ strategy is introduced to cope with the phase shifts and multiple modal issues in motif discovery problem. A novel grouping motif alignment (GMA) algorithm is designed to select motifs by clustering a population of candidate local alignments and successfully applied to subtle motif discovery. MCEMDA compares favorably to other popular PWM-based and word enumerative motif algorithms tested using simulated $ (l, d) $-motif cases, documented prokaryotic, and eukaryotic DNA motif sequences. Finally, MCEMDA is applied to detect large blocks of conserved domains using protein benchmarks and exhibits its excellent capacity while compared with other multiple sequence alignment methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Expectation maximization (EM); Monte Carlo EM; motif discovery; multiple sequence alignment; transcriptional regulation.", } @Article{Stoye:2009:UAR, author = "Jens Stoye and Roland Wittler", title = "A Unified Approach for Reconstructing Ancient Gene Clusters", journal = j-TCBB, volume = "6", number = "3", pages = "387--400", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.135", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The order of genes in genomes provides extensive information. In comparative genomics, differences or similarities of gene orders are determined to predict functional relations of genes or phylogenetic relations of genomes. For this purpose, various combinatorial models can be used to identify gene clusters --- groups of genes that are colocated in a set of genomes. We introduce a unified approach to model gene clusters and define the problem of labeling the inner nodes of a given phylogenetic tree with sets of gene clusters. Our optimization criterion in this context combines two properties: parsimony, i.e., the number of gains and losses of gene clusters has to be minimal, and consistency, i.e., for each ancestral node, there must exist at least one potential gene order that contains all the reconstructed clusters. We present and evaluate an exact algorithm to solve this problem. Despite its exponential worst-case time complexity, our method is suitable even for large-scale data. We show the effectiveness and efficiency on both simulated and real data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Comparative genomics; consistency.; gene cluster; gene cluster reconstruction; gene order; parsimony; phylogeny", } @Article{Chen:2009:AAM, author = "Xin Chen and Yun Cui", title = "An Approximation Algorithm for the Minimum Breakpoint Linearization Problem", journal = j-TCBB, volume = "6", number = "3", pages = "401--409", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.3", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the recent years, there has been a growing interest in inferring the total order of genes or markers on a chromosome, since current genetic mapping efforts might only suffice to produce a partial order. Many interesting optimization problems were thus formulated in the framework of genome rearrangement. As an important one among them, the minimum breakpoint linearization (MBL) problem is to find the total order of a partially ordered genome that minimizes its breakpoint distance to a reference genome whose genes are already totally ordered. It was previously shown to be NP-hard, and the algorithms proposed so far are all heuristic. In this paper, we present an $ m^2 + m \over 2 $-approximation algorithm for the MBL problem, where $m$ is the number of gene maps that are combined together to form a partial order of the genome under investigation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "approximation algorithms.; breakpoint distance; Comparative genomics; partially ordered genomes", } @Article{Wang:2009:EKF, author = "Zidong Wang and Xiaohui Liu and Yurong Liu and Jinling Liang and Veronica Vinciotti", title = "An Extended {Kalman} Filtering Approach to Modeling Nonlinear Dynamic Gene Regulatory Networks via Short Gene Expression Time Series", journal = j-TCBB, volume = "6", number = "3", pages = "410--419", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.5", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, the extended Kalman filter (EKF) algorithm is applied to model the gene regulatory network from gene time series data. The gene regulatory network is considered as a nonlinear dynamic stochastic model that consists of the gene measurement equation and the gene regulation equation. After specifying the model structure, we apply the EKF algorithm for identifying both the model parameters and the actual value of gene expression levels. It is shown that the EKF algorithm is an online estimation algorithm that can identify a large number of parameters (including parameters of nonlinear functions) through iterative procedure by using a small number of observations. Four real-world gene expression data sets are employed to demonstrate the effectiveness of the EKF algorithm, and the obtained models are evaluated from the viewpoint of bioinformatics.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "clustering; DNA microarray technology; extended Kalman filtering; gene expression; Modeling; time series data.", } @Article{Bryant:2009:CDT, author = "David Bryant and Mike Steel", title = "Computing the Distribution of a Tree Metric", journal = j-TCBB, volume = "6", number = "3", pages = "420--426", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.32", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Robinson--Foulds (RF) distance is by far the most widely used measure of dissimilarity between trees. Although the distribution of these distances has been investigated for 20 years, an algorithm that is explicitly polynomial time has yet to be described for computing the distribution for trees around a given tree. In this paper, we derive a polynomial-time algorithm for this distribution. We show how the distribution can be approximated by a Poisson distribution determined by the proportion of leaves that lie in ``cherries'' of the given tree. We also describe how our results can be used to derive normalization constants that are required in a recently proposed maximum likelihood approach to supertree construction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Biology and genetics; discrete mathematics applications; normalization constant.; phylogenetics; Poisson approximation; Robinson--Foulds distance; trees", } @Article{Hulsman:2009:EOK, author = "Marc Hulsman and Marcel J. T. Reinders and Dick de Ridder", title = "Evolutionary Optimization of Kernel Weights Improves Protein Complex Comembership Prediction", journal = j-TCBB, volume = "6", number = "3", pages = "427--437", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In recent years, more and more high-throughput data sources useful for protein complex prediction have become available (e.g., gene sequence, mRNA expression, and interactions). The integration of these different data sources can be challenging. Recently, it has been recognized that kernel-based classifiers are well suited for this task. However, the different kernels (data sources) are often combined using equal weights. Although several methods have been developed to optimize kernel weights, no large-scale example of an improvement in classifier performance has been shown yet. In this work, we employ an evolutionary algorithm to determine weights for a larger set of kernels by optimizing a criterion based on the area under the ROC curve. We show that setting the right kernel weights can indeed improve performance. We compare this to the existing kernel weight optimization methods (i.e., (regularized) optimization of the SVM criterion or aligning the kernel with an ideal kernel) and find that these do not result in a significant performance improvement and can even cause a decrease in performance. Results also show that an expert approach of assigning high weights to features with high individual performance is not necessarily the best strategy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "biology and genetics; Classifier design and evaluation; evolutionary computing and genetic algorithms.", } @Article{Chen:2009:IAA, author = "Zhi-Zhong Chen and Lusheng Wang", title = "Improved Approximation Algorithms for Reconstructing the History of Tandem Repeats", journal = j-TCBB, volume = "6", number = "3", pages = "438--453", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.122", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Some genetic diseases in human beings are dominated by short sequences repeated consecutively called tandem repeats. Once a region containing tandem repeats is found, it is of great interest to study the history of creating the repeats. The computational problem of reconstructing the duplication history of tandem repeats has been studied extensively in the literature. Almost all previous studies focused on the simplest case where the size of each duplication block is 1. Only recently we succeeded in giving the first polynomial-time approximation algorithm with a guaranteed ratio for a more general case where the size of each duplication block is at most $2$; the algorithm achieves a ratio of $6$ and runs in $ O(n^{11}) $ time. In this paper, we present two new polynomial-time approximation algorithms for this more general case. One of them achieves a ratio of $5$ and runs in $ O(n^9) $ time, while the other achieves a ratio of $ 2.5 + \epsilon $ for any constant $ \epsilon > 0 $ but runs slower.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "approximation algorithms.; Computational biology", } @Article{Cardona:2009:MPNb, author = "Gabriel Cardona and Merce Llabres and Francesc Rossello and Gabriel Valiente", title = "Metrics for Phylogenetic Networks {II}: Nodal and Triplets Metrics", journal = j-TCBB, volume = "6", number = "3", pages = "454--469", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The assessment of phylogenetic network reconstruction methods requires the ability to compare phylogenetic networks. This is the second in a series of papers devoted to the analysis and comparison of metrics for tree-child time consistent phylogenetic networks on the same set of taxa. In this paper, we generalize to phylogenetic networks two metrics that have already been introduced in the literature for phylogenetic trees: the nodal distance and the triplets distance. We prove that they are metrics on any class of tree-child time consistent phylogenetic networks on the same set of taxa, as well as some basic properties for them. To prove these results, we introduce a reduction/expansion procedure that can be used not only to establish properties of tree-child time consistent phylogenetic networks by induction, but also to generate all tree-child time consistent phylogenetic networks with a given number of leaves.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "nodal distance; partition distance; Phylogenetic network; temporal representation; time consistency; tree-child phylogenetic network; triplets distance.", } @Article{Sotiropoulos:2009:MRM, author = "Vassilios Sotiropoulos and Marrie-Nathalie Contou-Carrere and Prodromos Daoutidis and Yiannis N. Kaznessis", title = "Model Reduction of Multiscale Chemical {Langevin} Equations: a Numerical Case Study", journal = j-TCBB, volume = "6", number = "3", pages = "470--482", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.23", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Two very important characteristics of biological reaction networks need to be considered carefully when modeling these systems. First, models must account for the inherent probabilistic nature of systems far from the thermodynamic limit. Often, biological systems cannot be modeled with traditional continuous-deterministic models. Second, models must take into consideration the disparate spectrum of time scales observed in biological phenomena, such as slow transcription events and fast dimerization reactions. In the last decade, significant efforts have been expended on the development of stochastic chemical kinetics models to capture the dynamics of biomolecular systems, and on the development of robust multiscale algorithms, able to handle stiffness. In this paper, the focus is on the dynamics of reaction sets governed by stiff chemical Langevin equations, i.e., stiff stochastic differential equations. These are particularly challenging systems to model, requiring prohibitively small integration step sizes. We describe and illustrate the application of a semianalytical reduction framework for chemical Langevin equations that results in significant gains in computational cost.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "chemical Langevin equations (CLEs); Model reduction; multiscale models; stiff biomolecular systems.; stochastic chemical kinetics", } @Article{Roytberg:2009:SSP, author = "Mikhail Roytberg and Anna Gambin and Laurent Noe and Slawomir Lasota and Eugenia Furletova and Ewa Szczurek and Gregory Kucherov", title = "On Subset Seeds for Protein Alignment", journal = j-TCBB, volume = "6", number = "3", pages = "483--494", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.4", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We apply the concept of subset seeds proposed in [1] to similarity search in protein sequences. The main question studied is the design of efficient seed alphabets to construct seeds with optimal sensitivity/selectivity trade-offs. We propose several different design methods and use them to construct several alphabets. We then perform a comparative analysis of seeds built over those alphabets and compare them with the standard Blastp seeding method [2], [3], as well as with the family of vector seeds proposed in [4]. While the formalism of subset seeds is less expressive (but less costly to implement) than the cumulative principle used in Blastp and vector seeds, our seeds show a similar or even better performance than Blastp on Bernoulli models of proteins compatible with the common BLOSUM62 matrix. Finally, we perform a large-scale benchmarking of our seeds against several main databases of protein alignments. Here again, the results show a comparable or better performance of our seeds versus Blastp.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "local alignment; multiple seeds; protein databases; Protein sequences; seed alphabet; seeds; selectivity.; sensitivity; similarity search; subset seeds", } @Article{Jin:2009:PSP, author = "Guohua Jin and Luay Nakhleh and Sagi Snir and Tamir Tuller", title = "Parsimony Score of Phylogenetic Networks: Hardness Results and a Linear-Time Heuristic", journal = j-TCBB, volume = "6", number = "3", pages = "495--505", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.119", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phylogenies --- the evolutionary histories of groups of organisms --- play a major role in representing the interrelationships among biological entities. Many methods for reconstructing and studying such phylogenies have been proposed, almost all of which assume that the underlying history of a given set of species can be represented by a binary tree. Although many biological processes can be effectively modeled and summarized in this fashion, others cannot: recombination, hybrid speciation, and horizontal gene transfer result in networks of relationships rather than trees of relationships. In previous works, we formulated a maximum parsimony (MP) criterion for reconstructing and evaluating phylogenetic networks, and demonstrated its quality on biological as well as synthetic data sets. In this paper, we provide further theoretical results as well as a very fast heuristic algorithm for the MP criterion of phylogenetic networks. In particular, we provide a novel combinatorial definition of phylogenetic networks in terms of ``forbidden cycles,'' and provide detailed hardness and hardness of approximation proofs for the ``small'' MP problem. We demonstrate the performance of our heuristic in terms of time and accuracy on both biological and synthetic data sets. Finally, we explain the difference between our model and a similar one formulated by Nguyen et al., and describe the implications of this difference on the hardness and approximation results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "hardness and approximation.; horizontal gene transfer; Maximum parsimony; phylogenetic networks", } @Article{Thomas:2009:PDS, author = "John Thomas and Naren Ramakrishnan and Chris Bailey-Kellogg", title = "Protein Design by Sampling an Undirected Graphical Model of Residue Constraints", journal = j-TCBB, volume = "6", number = "3", pages = "506--516", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.124", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper develops an approach for designing protein variants by sampling sequences that satisfy residue constraints encoded in an undirected probabilistic graphical model. Due to evolutionary pressures on proteins to maintain structure and function, the sequence record of a protein family contains valuable information regarding position-specific residue conservation and coupling (or covariation) constraints. Representing these constraints with a graphical model provides two key benefits for protein design: a probabilistic semantics enabling evaluation of possible sequences for consistency with the constraints, and an explicit factorization of residue dependence and independence supporting efficient exploration of the constrained sequence space. We leverage these benefits in developing two complementary MCMC algorithms for protein design: constrained shuffling mixes wild-type sequences positionwise and evaluates graphical model likelihood, while component sampling directly generates sequences by sampling clique values and propagating to other cliques. We apply our methods to design WW domains. We demonstrate that likelihood under a model of wild-type WWs is highly predictive of foldedness of new WWs. We then show both theoretical and rapid empirical convergence of our algorithms in generating high-likelihood, diverse new sequences. We further show that these sequences capture the original sequence constraints, yielding a model as predictive of foldedness as the original one.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "graphical models; Markov chain Monte Carlo (MCMC).; Protein design; residue coupling", } @Article{Smith:2009:RSD, author = "Jennifer A. Smith", title = "{RNA} Search with Decision Trees and Partial Covariance Models", journal = j-TCBB, volume = "6", number = "3", pages = "517--527", month = jul, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.120", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 11 18:13:22 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The use of partial covariance models to search for RNA family members in genomic sequence databases is explored. The partial models are formed from contiguous subranges of the overall RNA family multiple alignment columns. A binary decision-tree framework is presented for choosing the order to apply the partial models and the score thresholds on which to make the decisions. The decision trees are chosen to minimize computation time subject to the constraint that all of the training sequences are passed to the full covariance model for final evaluation. Computational intelligence methods are suggested to select the decision tree since the tree can be quite complex and there is no obvious method to build the tree in these cases. Experimental results from seven RNA families shows execution times of 0.066-0.268 relative to using the full covariance model alone. Tests on the full sets of known sequences for each family show that at least 95 percent of these sequences are found for two families and 100 percent for five others. Since the full covariance model is run on all sequences accepted by the partial model decision tree, the false alarm rate is at least as low as that of the full model alone.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Bioinformatics; computational intelligence; covariance models; decision trees; RNA database search.", } @Article{Chen:2009:SCP, author = "Jie Chen and Yu-Ping Wang", title = "A Statistical Change Point Model Approach for the Detection of {DNA} Copy Number Variations in Array {CGH} Data", journal = j-TCBB, volume = "6", number = "4", pages = "529--541", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Array comparative genomic hybridization (aCGH) provides a high-resolution and high-throughput technique for screening of copy number variations (CNVs) within the entire genome. This technique, compared to the conventional CGH, significantly improves the identification of chromosomal abnormalities. However, due to the random noise inherited in the imaging and hybridization process, identifying statistically significant DNA copy number changes in aCGH data is challenging. We propose a novel approach that uses the mean and variance change point model (MVCM) to detect CNVs or breakpoints in aCGH data sets. We derive an approximate p-value for the test statistic and also give the estimate of the locus of the DNA copy number change. We carry out simulation studies to evaluate the accuracy of the estimate and the p-value formulation. These simulation results show that the approach is effective in identifying copy number changes. The approach is also tested on fibroblast cancer cell line data, breast tumor cell line data, and breast cancer cell line aCGH data sets that are publicly available. Changes that have not been identified by the circular binary segmentation (CBS) method but are biologically verified are detected by our approach on these cell lines with higher sensitivity and specificity than CBS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Prakash:2009:ADM, author = "Amol Prakash and Martin Tompa", title = "Assessing the Discordance of Multiple Sequence Alignments", journal = j-TCBB, volume = "6", number = "4", pages = "542--551", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70271", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiple sequence alignments have wide applicability in many areas of computational biology, including comparative genomics, functional annotation of proteins, gene finding, and modeling evolutionary processes. Because of the computational difficulty of multiple sequence alignment and the availability of numerous tools, it is critical to be able to assess the reliability of multiple alignments. We present a tool called StatSigMA to assess whether multiple alignments of nucleotide or amino acid sequences are contaminated with one or more unrelated sequences. There are numerous applications for which StatSigMA can be used. Two such applications are to distinguish homologous sequences from nonhomologous ones and to compare alignments produced by various multiple alignment tools. We present examples of both types of applications.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cardona:2009:CTC, author = "Gabriel Cardona and Francesc Rossello and Gabriel Valiente", title = "Comparison of Tree-Child Phylogenetic Networks", journal = j-TCBB, volume = "6", number = "4", pages = "552--569", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70270", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phylogenetic networks are a generalization of phylogenetic trees that allow for the representation of nontreelike evolutionary events, like recombination, hybridization, or lateral gene transfer. While much progress has been made to find practical algorithms for reconstructing a phylogenetic network from a set of sequences, all attempts to endorse a class of phylogenetic networks (strictly extending the class of phylogenetic trees) with a well-founded distance measure have, to the best of our knowledge and with the only exception of the bipartition distance on regular networks, failed so far. In this paper, we present and study a new meaningful class of phylogenetic networks, called tree-child phylogenetic networks, and we provide an injective representation of these networks as multisets of vectors of natural numbers, their path multiplicity vectors. We then use this representation to define a distance on this class that extends the well-known Robinson--Foulds distance for phylogenetic trees and to give an alignment method for pairs of networks in this class. Simple polynomial algorithms for reconstructing a tree-child phylogenetic network from its path multiplicity vectors, for computing the distance between two tree-child phylogenetic networks and for aligning a pair of tree-child phylogenetic networks, are provided. They have been implemented as a Perl package and a Java applet, which can be found at http://bioinfo.uib.es/~recerca/phylonetworks/mudistance/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hong:2009:HRD, author = "Changjin Hong and Ahmed H. Tewfik", title = "Heuristic Reusable Dynamic Programming: Efficient Updates of Local Sequence Alignment", journal = j-TCBB, volume = "6", number = "4", pages = "570--582", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.30", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recomputation of the previously evaluated similarity results between biological sequences becomes inevitable when researchers realize errors in their sequenced data or when the researchers have to compare nearly similar sequences, e.g., in a family of proteins. We present an efficient scheme for updating local sequence alignments with an affine gap model. In principle, using the previous matching result between two amino acid sequences, we perform a forward-backward alignment to generate heuristic searching bands which are bounded by a set of suboptimal paths. Given a correctly updated sequence, we initially predict a new score of the alignment path for each contour to select the best candidates among them. Then, we run the Smith-Waterman algorithm in this confined space. Furthermore, our heuristic alignment for an updated sequence shows that it can be further accelerated by using reusable dynamic programming (rDP), our prior work. In this study, we successfully validate ``relative node tolerance bound'' (RNTB) in the pruned searching space. Furthermore, we improve the computational performance by quantifying the successful RNTB tolerance probability and switch to rDP on perturbation-resilient columns only. In our searching space derived by a threshold value of 90 percent of the optimal alignment score, we find that 98.3 percent of contours contain correctly updated paths. We also find that our method consumes only 25.36 percent of the runtime cost of sparse dynamic programming (sDP) method, and to only 2.55 percent of that of a normal dynamic programming with the Smith-Waterman algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2009:EPS, author = "Yong Wang and Wu Ling-Yun and Ji-Hong Zhang and Zhong-Wei Zhan and Zhang Xiang-Sun and Chen Luonan", title = "Evaluating Protein Similarity from Coarse Structures", journal = j-TCBB, volume = "6", number = "4", pages = "583--593", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70250", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To unscramble the relationship between protein function and protein structure, it is essential to assess the protein similarity from different aspects. Although many methods have been proposed for protein structure alignment or comparison, alternative similarity measures are still strongly demanded due to the requirement of fast screening and query in large-scale structure databases. In this paper, we first formulate a novel representation of a protein structure, i.e., Feature Sequence of Surface (FSS). Then, a new score scheme is developed to measure the similarity between two representations. To verify the proposed method, numerical experiments are conducted in four different protein data sets. We also classify SARS coronavirus to verify the effectiveness of the new method. Furthermore, preliminary results of fast classification of the whole CATH v2.5.1 database based on the new macrostructure similarity are given as a pilot study. We demonstrate that the proposed approach to measure the similarities between protein structures is simple to implement, computationally efficient, and surprisingly fast. In addition, the method itself provides a new and quantitative tool to view a protein structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Salicru:2009:ICA, author = "Miquel Salicru and Sergi Vives and Tian Zheng", title = "Inferential Clustering Approach for Microarray Experiments with Replicated Measurements", journal = j-TCBB, volume = "6", number = "4", pages = "594--604", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.106", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cluster analysis has proven to be a useful tool for investigating the association structure among genes in a microarray data set. There is a rich literature on cluster analysis and various techniques have been developed. Such analyses heavily depend on an appropriate (dis)similarity measure. In this paper, we introduce a general clustering approach based on the confidence interval inferential methodology, which is applied to gene expression data of microarray experiments. Emphasis is placed on data with low replication (three or five replicates). The proposed method makes more efficient use of the measured data and avoids the subjective choice of a dissimilarity measure. This new methodology, when applied to real data, provides an easy-to-use bioinformatics solution for the cluster analysis of microarray experiments with replicates (see the Appendix). Even though the method is presented under the framework of microarray experiments, it is a general algorithm that can be used to identify clusters in any situation. The method's performance is evaluated using simulated and publicly available data set. Our results also clearly show that our method is not an extension of the conventional clustering method based on correlation or euclidean distance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Niijima:2009:LLD, author = "Satoshi Niijima and Yasushi Okuno", title = "{Laplacian} Linear Discriminant Analysis Approach to Unsupervised Feature Selection", journal = j-TCBB, volume = "6", number = "4", pages = "605--614", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70257", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Until recently, numerous feature selection techniques have been proposed and found wide applications in genomics and proteomics. For instance, feature/gene selection has proven to be useful for biomarker discovery from microarray and mass spectrometry data. While supervised feature selection has been explored extensively, there are only a few unsupervised methods that can be applied to exploratory data analysis. In this paper, we address the problem of unsupervised feature selection. First, we extend Laplacian linear discriminant analysis (LLDA) to unsupervised cases. Second, we propose a novel algorithm for computing LLDA, which is efficient in the case of high dimensionality and small sample size as in microarray data. Finally, an unsupervised feature selection method, called LLDA-based Recursive Feature Elimination (LLDA-RFE), is proposed. We apply LLDA-RFE to several public data sets of cancer microarrays and compare its performance with those of Laplacian score and SVD-entropy, two state-of-the-art unsupervised methods, and with that of Fisher score, a supervised filter method. Our results demonstrate that LLDA-RFE outperforms Laplacian score and shows favorable performance against SVD-entropy. It performs even better than Fisher score for some of the data sets, despite the fact that LLDA-RFE is fully unsupervised.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rasmussen:2009:MVU, author = "Carl Rasmussen and Bernard de la Cruz and Zoubin Ghahramani and David Wild", title = "Modeling and Visualizing Uncertainty in Gene Expression Clusters Using {Dirichlet} Process Mixtures", journal = j-TCBB, volume = "6", number = "4", pages = "615--628", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70269", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Although the use of clustering methods has rapidly become one of the standard computational approaches in the literature of microarray gene expression data, little attention has been paid to uncertainty in the results obtained. Dirichlet process mixture (DPM) models provide a nonparametric Bayesian alternative to the bootstrap approach to modeling uncertainty in gene expression clustering. Most previously published applications of Bayesian model-based clustering methods have been to short time series data. In this paper, we present a case study of the application of nonparametric Bayesian clustering methods to the clustering of high-dimensional nontime series gene expression data using full Gaussian covariances. We use the probability that two genes belong to the same cluster in a DPM model as a measure of the similarity of these gene expression profiles. Conversely, this probability can be used to define a dissimilarity measure, which, for the purposes of visualization, can be input to one of the standard linkage algorithms used for hierarchical clustering. Biologically plausible results are obtained from the Rosetta compendium of expression profiles which extend previously published cluster analyses of this data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cardona:2009:NMR, author = "Gabriel Cardona and Merce Llabres and Francesc Rossello and Gabriel Valiente", title = "On {Nakhleh}'s Metric for Reduced Phylogenetic Networks", journal = j-TCBB, volume = "6", number = "4", pages = "629--638", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.33", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We prove that Nakhleh's metric for reduced phylogenetic networks is also a metric on the classes of tree-child phylogenetic networks, semibinary tree-sibling time consistent phylogenetic networks, and multilabeled phylogenetic trees. We also prove that it separates distinguishable phylogenetic networks. In this way, it becomes the strongest dissimilarity measure for phylogenetic networks available so far. Furthermore, we propose a generalization of that metric that separates arbitrary phylogenetic networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chowriappa:2009:PSC, author = "Pradeep Chowriappa and Sumeet Dua and Jinko Kanno and Hilary W. Thompson", title = "Protein Structure Classification Based on Conserved Hydrophobic Residues", journal = j-TCBB, volume = "6", number = "4", pages = "639--651", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.77", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein folding is frequently guided by local residue interactions that form clusters in the protein core. The interactions between residue clusters serve as potential nucleation sites in the folding process. Evidence postulates that the residue interactions are governed by the hydrophobic propensities that the residues possess. An array of hydrophobicity scales has been developed to determine the hydrophobic propensities of residues under different environmental conditions. In this work, we propose a graph-theory-based data mining framework to extract and isolate protein structural features that sustain invariance in evolutionary-related proteins, through the integrated analysis of five well-known hydrophobicity scales over the 3D structure of proteins. We hypothesize that proteins of the same homology contain conserved hydrophobic residues and exhibit analogous residue interaction patterns in the folded state. The results obtained demonstrate that discriminatory residue interaction patterns shared among proteins of the same family can be employed for both the structural and the functional annotation of proteins. We obtained on the average 90 percent accuracy in protein classification with a significantly small feature vector compared to previous results in the area. This work presents an elaborate study, as well as validation evidence, to illustrate the efficacy of the method and the correctness of results reported.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Uehara:2009:PDC, author = "Hiroaki Uehara and Masakazu Jimbo", title = "A Positive Detecting Code and Its Decoding Algorithm for {DNA} Library Screening", journal = j-TCBB, volume = "6", number = "4", pages = "652--666", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70266", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The study of gene functions requires high-quality DNA libraries. However, a large number of tests and screenings are necessary for compiling such libraries. We describe an algorithm for extracting as much information as possible from pooling experiments for library screening. Collections of clones are called pools, and a pooling experiment is a group test for detecting all positive clones. The probability of positiveness for each clone is estimated according to the outcomes of the pooling experiments. Clones with high chance of positiveness are subjected to confirmatory testing. In this paper, we introduce a new positive clone detecting algorithm, called the Bayesian network pool result decoder (BNPD). The performance of BNPD is compared, by simulation, with that of the Markov chain pool result decoder (MCPD) proposed by Knill et al. in 1996. Moreover, the combinatorial properties of pooling designs suitable for the proposed algorithm are discussed in conjunction with combinatorial designs and d\hbox{-}{\rm disjunct} matrices. We also show the advantage of utilizing packing designs or BIB designs for the BNPD algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{vanIersel:2009:CLP, author = "Leo van Iersel and Judith Keijsper and Steven Kelk and Leen Stougie and Ferry Hagen and Teun Boekhout", title = "Constructing Level-2 Phylogenetic Networks from Triplets", journal = j-TCBB, volume = "6", number = "4", pages = "667--681", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.22", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Jansson and Sung showed that, given a dense set of input triplets T (representing hypotheses about the local evolutionary relationships of triplets of taxa), it is possible to determine in polynomial time whether there exists a level-1 network consistent with T, and if so, to construct such a network [24]. Here, we extend this work by showing that this problem is even polynomial time solvable for the construction of level-2 networks. This shows that, assuming density, it is tractable to construct plausible evolutionary histories from input triplets even when such histories are heavily nontree-like. This further strengthens the case for the use of triplet-based methods in the construction of phylogenetic networks. We also implemented the algorithm and applied it to yeast data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mneimneh:2009:AOS, author = "Saad Mneimneh", title = "On the Approximation of Optimal Structures for {RNA--RNA} Interaction", journal = j-TCBB, volume = "6", number = "4", pages = "682--688", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2007.70258", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The interaction of two RNA molecules is a common mechanism for many biological processes. Small interfering RNAs represent a simple example of such an interaction. But other more elaborate instances of RNA-RNA interaction exist. Therefore, algorithms that predict the structure of the RNA complex thus formed are of great interest. Most of the proposed algorithms are based on dynamic programming. RNA-RNA interaction is generally NP-complete; therefore, these algorithms (and other polynomial time algorithms for that matter) are not expected to produce optimal structures. Our goal is to characterize this suboptimality. We demonstrate the existence of constant factor approximation algorithms that are based on dynamic programming. In particular, we describe 1/2 and 2/3 factor approximation algorithms. We define an entangler and prove that 2/3 is a theoretical upper bound on the approximation factor of algorithms that produce entangler-free solutions, e.g., the mentioned dynamic programming algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Diago:2009:EGC, author = "Luis A. Diago and Ernesto Moreno", title = "Evaluation of Geometric Complementarity between Molecular Surfaces Using Compactly Supported Radial Basis Functions", journal = j-TCBB, volume = "6", number = "4", pages = "689--694", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.31", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the challenges faced by all molecular docking algorithms is that of being able to discriminate between correct results and false positives obtained in the simulations. The scoring or energetic function is the one that must fulfill this task. Several scoring functions have been developed and new methodologies are still under development. In this paper, we have employed the Compactly Supported Radial Basis Functions (CSRBF) to create analytical representations of molecular surfaces, which are then included as key components of a new scoring function for molecular docking. The method proposed here achieves a better ranking of the solutions produced by the program DOCK, as compared with the ranking done by its native contact scoring function. Our new analytical scoring function based on CSRBF can be easily included in different available docking programs as a reliable and quick filter in large-scale docking simulations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gonzalez:2009:MLT, author = "Ana M. Gonzalez and Francisco J. Azuaje and Jose L. Ramirez and Jose F. da Silveira and Jose R. Dorronsoro", title = "Machine Learning Techniques for the Automated Classification of Adhesin-Like Proteins in the Human Protozoan Parasite \bioname{Trypanosoma cruzi}", journal = j-TCBB, volume = "6", number = "4", pages = "695--702", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.125", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper reports on the evaluation of different machine learning techniques for the automated classification of coding gene sequences obtained from several organisms in terms of their functional role as adhesins. Diverse, biologically-meaningful, sequence-based features were extracted from the sequences and used as inputs to the in silico prediction models. Another contribution of this work is the generation of potentially novel and testable predictions about the surface protein DGF-1 family in Trypanosoma cruzi. Finally, these techniques are potentially useful for the automated annotation of known adhesin-like proteins from the trans-sialidase surface protein family in T. cruzi, the etiological agent of Chagas disease.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2009:CPS, author = "Anonymous", title = "Call for Papers: Special Issue of Transactions in Computational Biology and Bioinformatics: Special Issue on {BioCreative II.5}", journal = j-TCBB, volume = "6", number = "4", pages = "703", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.73", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2009:TAI, author = "Anonymous", title = "2009 {TCBB} Annual Index", journal = j-TCBB, volume = "6", number = "4", pages = "Not in Print", month = oct, year = "2009", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.72", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 1 16:16:42 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2010:EN, author = "Anonymous", title = "{Editor}'s Note", journal = j-TCBB, volume = "7", number = "1", pages = "1--1", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Diaz:2010:ADL, author = "Ester Diaz and Guillermo Ayala and Maria Diaz-Fernandez and Liang Gong and Derek Toomre", title = "Automatic Detection of Large Dense-Core Vesicles in Secretory Cells and Statistical Analysis of Their Intracellular Distribution", journal = j-TCBB, volume = "7", number = "1", pages = "2--11", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lushbough:2010:BSI, author = "Carol Lushbough and Michael K. Bergman and Carolyn J. Lawrence and Doug Jennewein and Volker Brendel", title = "{BioExtract Server} --- an Integrated Workflow-Enabling System to Access and Analyze Heterogeneous, Distributed Biomolecular Data", journal = j-TCBB, volume = "7", number = "1", pages = "12--24", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2010:FSG, author = "Shenghuo Zhu and Dingding Wang and Kai Yu and Tao Li and Yihong Gong", title = "Feature Selection for Gene Expression Using Model-Based Entropy", journal = j-TCBB, volume = "7", number = "1", pages = "25--36", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pehkonen:2010:HBS, author = "Petri Pehkonen and Garry Wong and Petri Toronen", title = "Heuristic {Bayesian} Segmentation for Discovery of Coexpressed Genes within Genomic Regions", journal = j-TCBB, volume = "7", number = "1", pages = "37--49", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kustra:2010:DFC, author = "Rafal Kustra and Adam Zagdanski", title = "Data-Fusion in Clustering Microarray Data: Balancing Discovery and Interpretability", journal = j-TCBB, volume = "7", number = "1", pages = "50--63", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rubel:2010:IDC, author = "Oliver Rubel and Gunther H. Weber and Min-Yu Huang and E. Wes Bethel and Mark D. Biggin and Charless C. Fowlkes and Cris L. Luengo Hendriks and Soile V. E. Keranen and Michael B. Eisen and David W. Knowles and Jitendra Malik and Hans Hagen and Bernd Hamann", title = "Integrating Data Clustering and Visualization for the Analysis of {$3$D} Gene Expression Data", journal = j-TCBB, volume = "7", number = "1", pages = "64--79", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Han:2010:MPC, author = "Ju Han and Hang Chang and Kumari Andarawewa and Paul Yaswen and Mary Helen Barcellos-Hoff and Bahram Parvin", title = "Multidimensional Profiling of Cell Surface Proteins and Nuclear Markers", journal = j-TCBB, volume = "7", number = "1", pages = "80--90", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Done:2010:PNH, author = "Bogdan Done and Purvesh Khatri and Arina Done and Sorin Draghici", title = "Predicting Novel Human Gene Ontology Annotations Using Semantic Analysis", journal = j-TCBB, volume = "7", number = "1", pages = "91--99", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2010:SSV, author = "Zhenqiu Liu and Shili Lin and Ming Tan", title = "Sparse Support Vector Machines with {$ L_p $} Penalty for Biomarker Identification", journal = j-TCBB, volume = "7", number = "1", pages = "100--107", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Leung:2010:MFM, author = "Yukyee Leung and Yeungsam Hung", title = "A Multiple-Filter-Multiple-Wrapper Approach to Gene Selection and Microarray Data Classification", journal = j-TCBB, volume = "7", number = "1", pages = "108--117", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Perkins:2010:TBS, author = "Theodore J. Perkins and Michael T. Hallett", title = "A Trade-Off between Sample Complexity and Computational Complexity in Learning {Boolean} Networks from Time-Series Data", journal = j-TCBB, volume = "7", number = "1", pages = "118--125", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pelikan:2010:EPL, author = "Richard Pelikan and Milos Hauskrecht", title = "Efficient Peak-Labeling Algorithms for Whole-Sample Mass Spectrometry Proteomics", journal = j-TCBB, volume = "7", number = "1", pages = "126--137", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mahata:2010:ECH, author = "Pritha Mahata", title = "Exploratory Consensus of Hierarchical Clusterings for Melanoma and Breast Cancer", journal = j-TCBB, volume = "7", number = "1", pages = "138--152", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Madeira:2010:IRM, author = "Sara C. Madeira and Miguel C. Teixeira and Isabel Sa-Correia and Arlindo L. Oliveira", title = "Identification of Regulatory Modules in Time Series Gene Expression Data Using a Linear Time Biclustering Algorithm", journal = j-TCBB, volume = "7", number = "1", pages = "153--165", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mossel:2010:ILS, author = "Elchanan Mossel and Sebastien Roch", title = "Incomplete Lineage Sorting: Consistent Phylogeny Estimation from Multiple Loci", journal = j-TCBB, volume = "7", number = "1", pages = "166--171", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Freitas:2010:ICC, author = "Alex A. Freitas and Daniela C. Wieser and Rolf Apweiler", title = "On the Importance of Comprehensible Classification Models for Protein Function Prediction", journal = j-TCBB, volume = "7", number = "1", pages = "172--182", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Alon:2010:AMP, author = "Noga Alon and Benny Chor and Fabio Pardi and Anat Rapoport", title = "Approximate Maximum Parsimony and Ancestral Maximum Likelihood", journal = j-TCBB, volume = "7", number = "1", pages = "183--187", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2010:RL, author = "Anonymous", title = "2009 Reviewer's List", journal = j-TCBB, volume = "7", number = "1", pages = "188--190", month = jan, year = "2010", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 15 18:56:53 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sagot:2010:EE, author = "Marie-France Sagot", title = "{EIC} Editorial", journal = j-TCBB, volume = "7", number = "2", pages = "193--194", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.29", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lonardi:2010:DMB, author = "Stefano Lonardi and Jake Chen", title = "Data Mining in Bioinformatics: Selected Papers from {BIOKDD}", journal = j-TCBB, volume = "7", number = "2", pages = "195--196", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Smalter:2010:GGP, author = "Aaron Smalter and Jun Huan and Yi Jia and Gerald Lushington", title = "{GPD}: a Graph Pattern Diffusion Kernel for Accurate Graph Classification with Applications in Cheminformatics", journal = j-TCBB, volume = "7", number = "2", pages = "197--207", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.80", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Graph data mining is an active research area. Graphs are general modeling tools to organize information from heterogeneous sources and have been applied in many scientific, engineering, and business fields. With the fast accumulation of graph data, building highly accurate predictive models for graph data emerges as a new challenge that has not been fully explored in the data mining community. In this paper, we demonstrate a novel technique called graph pattern diffusion (GPD) kernel. Our idea is to leverage existing frequent pattern discovery methods and to explore the application of kernel classifier (e.g., support vector machine) in building highly accurate graph classification. In our method, we first identify all frequent patterns from a graph database. We then map subgraphs to graphs in the graph database and use a process we call ``pattern diffusion'' to label nodes in the graphs. Finally, we designed a graph alignment algorithm to compute the inner product of two graphs. We have tested our algorithm using a number of chemical structure data. The experimental results demonstrate that our method is significantly better than competing methods such as those kernel functions based on paths, cycles, and subgraphs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "frequent subgraph mining.; graph alignment; Graph classification", } @Article{Bogdanov:2010:MFP, author = "Petko Bogdanov and Ambuj K. Singh", title = "Molecular Function Prediction Using Neighborhood Features", journal = j-TCBB, volume = "7", number = "2", pages = "208--217", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.81", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The recent advent of high-throughput methods has generated large amounts of gene interaction data. This has allowed the construction of genomewide networks. A significant number of genes in such networks remain uncharacterized and predicting the molecular function of these genes remains a major challenge. A number of existing techniques assume that genes with similar functions are topologically close in the network. Our hypothesis is that genes with similar functions observe similar annotation patterns in their neighborhood, regardless of the distance between them in the interaction network. We thus predict molecular functions of uncharacterized genes by comparing their functional neighborhoods to genes of known function. We propose a two-phase approach. First, we extract functional neighborhood features of a gene using Random Walks with Restarts. We then employ a KNN classifier to predict the function of uncharacterized genes based on the computed neighborhood features. We perform leave-one-out validation experiments on two $S$. cerevisiae interaction networks and show significant improvements over previous techniques. Our technique provides a natural control of the trade-off between accuracy and coverage of prediction. We further propose and evaluate prediction in sparse genomes by exploiting features from well-annotated genomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "classification; feature extraction; functional interaction network.; Gene function prediction", } @Article{Nakhleh:2010:MSR, author = "Luay Nakhleh", title = "A Metric on the Space of Reduced Phylogenetic Networks", journal = j-TCBB, volume = "7", number = "2", pages = "218--222", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.2", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phylogenetic networks are leaf-labeled, rooted, acyclic, and directed graphs that are used to model reticulate evolutionary histories. Several measures for quantifying the topological dissimilarity between two phylogenetic networks have been devised, each of which was proven to be a metric on certain restricted classes of phylogenetic networks. A biologically motivated class of phylogenetic networks, namely, reduced phylogenetic networks, was recently introduced. None of the existing measures is a metric on the space of reduced phylogenetic networks. In this paper, we provide a metric on the space of reduced phylogenetic networks that is computable in time polynomial in the size of the networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "indistinguishability; metric.; phylogenetic network; Phylogeny; reduced phylogenetic network", } @Article{Gupta:2010:AHD, author = "Gunjan Gupta and Alexander Liu and Joydeep Ghosh", title = "Automated Hierarchical Density Shaving: a Robust Automated Clustering and Visualization Framework for Large Biological Data Sets", journal = j-TCBB, volume = "7", number = "2", pages = "223--237", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.32", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A key application of clustering data obtained from sources such as microarrays, protein mass spectroscopy, and phylogenetic profiles is the detection of functionally related genes. Typically, only a small number of functionally related genes cluster into one or more groups, and the rest need to be ignored. For such situations, we present Automated Hierarchical Density Shaving (Auto-HDS), a framework that consists of a fast hierarchical density-based clustering algorithm and an unsupervised model selection strategy. Auto-HDS can automatically select clusters of different densities, present them in a compact hierarchy, and rank individual clusters using an innovative stability criteria. Our framework also provides a simple yet powerful 2D visualization of the hierarchy of clusters that is useful for further interactive exploration. We present results on Gasch and Lee microarray data sets to show the effectiveness of our methods. Additional results on other biological data are included in the supplemental material.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "bioinformatics.; clustering; data and knowledge visualization; Mining methods and algorithms", } @Article{Raiford:2010:AIT, author = "Douglas W. Raiford and Dan E. Krane and Travis E. Doom and Michael L. Raymer", title = "Automated Isolation of Translational Efficiency Bias That Resists the Confounding Effect of {GC(AT)}-Content", journal = j-TCBB, volume = "7", number = "2", pages = "238--250", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.65", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genomic sequencing projects are an abundant source of information for biological studies ranging from the molecular to the ecological in scale; however, much of the information present may yet be hidden from casual analysis. One such information domain, trends in codon usage, can provide a wealth of information about an organism's genes and their expression. Degeneracy in the genetic code allows more than one triplet codon to code for the same amino acid, and usage of these codons is often biased such that one or more of these synonymous codons are preferred. Detection of this bias is an important tool in the analysis of genomic data, particularly as a predictor of gene expressivity. Methods for identifying codon usage bias in genomic data that rely solely on genomic sequence data are susceptible to being confounded by the presence of several factors simultaneously influencing codon selection. Presented here is a new technique for removing the effects of one of the more common confounding factors, GC(AT)-content, and of visualizing the search-space for codon usage bias through the use of a solution landscape. This technique successfully isolates expressivity-related codon usage trends, using only genomic sequence information, where other techniques fail due to the presence of GC(AT)-content confounding influences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Codon usage bias; GC-content; strand bias; translational efficiency.", } @Article{Tenenhaus:2010:GAN, author = "Arthur Tenenhaus and Vincent Guillemot and Xavier Gidrol and Vincent Frouin", title = "Gene Association Networks from Microarray Data Using a Regularized Estimation of Partial Correlation Based on {PLS} Regression", journal = j-TCBB, volume = "7", number = "2", pages = "251--262", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.87", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstruction of gene-gene interactions from large-scale data such as microarrays is a first step toward better understanding the mechanisms at work in the cell. Two main issues have to be managed in such a context: (1) choosing which measures have to be used to distinguish between direct and indirect interactions from high-dimensional microarray data and (2) constructing networks with a low proportion of false-positive edges. We present an efficient methodology for the reconstruction of gene interaction networks in a small-sample-size setting. The strength of independence of any two genes is measured, in such `high-dimensional network,' by a regularized estimation of partial correlation based on Partial Least Squares Regression. We finally emphasize specific properties of the proposed method. To assess the sensitivity and specificity of the method, we carried out the reconstruction of networks from simulated data. We also tested PLS-based partial correlation network on static and dynamic real microarray data. An R implementation of the proposed algorithm is available from \path=http://biodev.extra.cea.fr/plspcnetwork/=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Gene association networks; high-dimensional data; local false discovery rate.; partial correlation; Partial Least Squares Regression", } @Article{Zhu:2010:IFP, author = "Zexuan Zhu and Yew-Soon Ong and Jacek M. Zurada", title = "Identification of Full and Partial Class Relevant Genes", journal = j-TCBB, volume = "7", number = "2", pages = "263--277", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.105", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiclass cancer classification on microarray data has provided the feasibility of cancer diagnosis across all of the common malignancies in parallel. Using multiclass cancer feature selection approaches, it is now possible to identify genes relevant to a set of cancer types. However, besides identifying the relevant genes for the set of all cancer types, it is deemed to be more informative to biologists if the relevance of each gene to specific cancer or subset of cancer types could be revealed or pinpointed. In this paper, we introduce two new definitions of multiclass relevancy features, i.e., full class relevant (FCR) and partial class relevant (PCR) features. Particularly, FCR denotes genes that serve as candidate biomarkers for discriminating all cancer types. PCR, on the other hand, are genes that distinguish subsets of cancer types. Subsequently, a Markov blanket embedded memetic algorithm is proposed for the simultaneous identification of both FCR and PCR genes. Results obtained on commonly used synthetic and real-world microarray data sets show that the proposed approach converges to valid FCR and PCR genes that would assist biologists in their research work. The identification of both FCR and PCR genes is found to generate improvement in classification accuracy on many microarray data sets. Further comparison study to existing state-of-the-art feature selection algorithms also reveals the effectiveness and efficiency of the proposed approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Bioinformatics; feature/gene selection; Markov blanket.; memetic algorithm; microarray; multiclass cancer classification", } @Article{Randhawa:2010:MCM, author = "Ranjit Randhawa and Cliff Shaffer and John Tyson", title = "Model Composition for Macromolecular Regulatory Networks", journal = j-TCBB, volume = "7", number = "2", pages = "278--287", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.64", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Models of regulatory networks become more difficult to construct and understand as they grow in size and complexity. Large models are usually built up from smaller models, representing subsets of reactions within the larger network. To assist modelers in this composition process, we present a formal approach for model composition, a wizard-style program for implementing the approach, and suggested language extensions to the Systems Biology Markup Language to support model composition. To illustrate the features of our approach and how to use the JigCell Composition Wizard, we build up a model of the eukaryotic cell cycle ``engine'' from smaller pieces.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "composition; flattening; fusion; Modeling; SBML.", } @Article{Bokhari:2010:RNI, author = "Shahid H. Bokhari and Daniel Janies", title = "Reassortment Networks for Investigating the Evolution of Segmented Viruses", journal = j-TCBB, volume = "7", number = "2", pages = "288--298", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.73", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many viruses of interest, such as influenza A, have distinct segments in their genome. The evolution of these viruses involves mutation and reassortment, where segments are interchanged between viruses that coinfect a host. Phylogenetic trees can be constructed to investigate the mutation-driven evolution of individual viral segments. However, reassortment events among viral genomes are not well depicted in such bifurcating trees. We propose the concept of reassortment networks to analyze the evolution of segmented viruses. These are layered graphs in which the layers represent evolutionary stages such as a temporal series of seasons in which influenza viruses are isolated. Nodes represent viral isolates and reassortment events between pairs of isolates. Edges represent evolutionary steps, while weights on edges represent edit costs of reassortment and mutation events. Paths represent possible transformation series among viruses. The length of each path is the sum edit cost of the events required to transform one virus into another. In order to analyze $ \tau $ stages of evolution of $n$ viruses with segments of maximum length $m$, we first compute the pairwise distances between all corresponding segments of all viruses in $ {\cal O}(m^2 n^2) $ time using dynamic programming. The reassortment network, with $ {\cal O}(\tau n^2) $ nodes, is then constructed using these distances. The ancestors and descendents of a specific virus can be traced via shortest paths in this network, which can be found in $ {\cal O}(\tau n^3) $ time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "dynamic programming; Influenza A; reassortment; segmented virus; shortest paths.", } @Article{Bergemann:2010:SQM, author = "Tracy L. Bergemann and Lue Ping Zhao", title = "Signal Quality Measurements for {cDNA} Microarray Data", journal = j-TCBB, volume = "7", number = "2", pages = "299--308", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.72", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Concerns about the reliability of expression data from microarrays inspire ongoing research into measurement error in these experiments. Error arises at both the technical level within the laboratory and the experimental level. In this paper, we will focus on estimating the spot-specific error, as there are few currently available models. This paper outlines two different approaches to quantify the reliability of spot-specific intensity estimates. In both cases, the spatial correlation between pixels and its impact on spot quality is accounted for. The first method is a straightforward parametric estimate of within-spot variance that assumes a Gaussian distribution and accounts for spatial correlation via an overdispersion factor. The second method employs a nonparametric quality estimate referred to throughout as the mean square prediction error (MSPE). The MSPE first smoothes a pixel region and then measures the difference between actual pixel values and the smoother. Both methods herein are compared for real and simulated data to assess numerical characteristics and the ability to describe poor spot quality. We conclude that both approaches capture noise in the microarray platform and highlight situations where one method or the other is superior.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "image analysis.; Microarray; prediction error; signal quality", } @Article{Blin:2010:ARS, author = "Guillaume Blin and Alain Denise and Serge Dulucq and Claire Herrbach and Heleene Touzet", title = "Alignments of {RNA} Structures", journal = j-TCBB, volume = "7", number = "2", pages = "309--322", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We describe a theoretical unifying framework to express the comparison of RNA structures, which we call alignment hierarchy. This framework relies on the definition of common supersequences for arc-annotated sequences and encompasses the main existing models for RNA structure comparison based on trees and arc-annotated sequences with a variety of edit operations. It also gives rise to edit models that have not been studied yet. We provide a thorough analysis of the alignment hierarchy, including a new polynomial-time algorithm and an NP-completeness proof. The polynomial-time algorithm involves biologically relevant edit operations such as pairing or unpairing nucleotides. It has been implemented in a software, called {\tt gardenia}, which is available at the Web server \path=http://bioinfo.lifl.fr/RNA/gardenia=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithm.; arc-annotated sequences; Computational biology; edit distance; NP-hardness; RNA structures", } @Article{Jiang:2010:AAP, author = "Minghui Jiang", title = "Approximation Algorithms for Predicting {RNA} Secondary Structures with Arbitrary Pseudoknots", journal = j-TCBB, volume = "7", number = "2", pages = "323--332", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.109", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study three closely related problems motivated by the prediction of RNA secondary structures with arbitrary pseudoknots: the problem 2-Interval Pattern proposed by Vialette [CHECK END OF SENTENCE], the problem Maximum Base Pair Stackings proposed by Leong et al. [CHECK END OF SENTENCE], and the problem Maximum Stacking Base Pairs proposed by Lyngs. [CHECK END OF SENTENCE]. For the 2-Interval Pattern, we present polynomial-time approximation algorithms for the problem over the preceding-and-crossing model and on input with the unitary restriction. For Maximum Base Pair Stackings and Maximum Stacking Base Pairs, we present polynomial-time approximation algorithms for the two problems on explicit input of candidate base pairs. We also propose a new problem called Length-Weighted Balanced 2-Interval Pattern, which is natural in the context of RNA secondary structure prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "2-intervals.; RNA secondary structure prediction; stacking pairs", } @Article{Shibuya:2010:FHD, author = "Tetsuo Shibuya", title = "Fast Hinge Detection Algorithms for Flexible Protein Structures", journal = j-TCBB, volume = "7", number = "2", pages = "333--341", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.62", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analysis of conformational changes is one of the keys to the understanding of protein functions and interactions. For the analysis, we often compare two protein structures, taking flexible regions like hinge regions into consideration. The Root Mean Square Deviation (RMSD) is the most popular measure for comparing two protein structures, but it is only for rigid structures without hinge regions. In this paper, we propose a new measure called RMSD considering hinges (RMSDh) and its variant {\rm RMSDh}$^{(k)}$ for comparing two flexible proteins with hinge regions. We also propose novel efficient algorithms for computing them, which can detect the hinge positions at the same time. The RMSDh is suitable for cases where there is one small hinge region in each of the two target structures. The new algorithm for computing the RMSDh runs in linear time, which is the same as the time complexity for computing the RMSD and is faster than any of previous algorithms for hinge detection. The {\rm RMSDh}$^{(k)}$ is designed for comparing structures with more than one hinge region. The {\rm RMSDh}$^{(k)}$ measure considers at most $k$ small hinge region, i.e., the {\rm RMSDh}$^{(k)}$ value should be small if the two structures are similar except for at most $k$ hinge regions. To compute the value, we propose an $ O(k n^2) $-time and $ O(n) $-space algorithm based on a new dynamic programming technique. With the same computational time and space, we can enumerate the predicted hinge positions. We also test our algorithms against actual flexible protein structures, and show that the hinge positions can be correctly detected by our algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "Algorithm; dynamic programming.; protein 3D structure comparison; protein hinge detection", } @Article{Guillemot:2010:FPT, author = "Sylvain Guillemot and Vincent Berry", title = "Fixed-Parameter Tractability of the Maximum Agreement Supertree Problem", journal = j-TCBB, volume = "7", number = "2", pages = "342--353", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.93", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Given a set $L$ of labels and a collection of rooted trees whose leaves are bijectively labeled by some elements of $L$, the Maximum Agreement Supertree (SMAST) problem is given as follows: find a tree $T$ on a largest label set $ L' \subseqeq L $ that homeomorphically contains every input tree restricted to $ L' $. The problem has phylogenetic applications to infer supertrees and perform tree congruence analyses. In this paper, we focus on the parameterized complexity of this NP-hard problem, considering different combinations of parameters as well as particular cases. We show that SMAST on $k$ rooted binary trees on a label set of size $n$ can be solved in $ O((8 n)^k) $ time, which is an improvement with respect to the previously known $ O(n^{3k^2}) $ time algorithm. In this case, we also give an $ O((2 k)^p k n^2) $ time algorithm, where $p$ is an upper bound on the number of leaves of $L$ missing in a SMAST solution. This shows that SMAST can be solved efficiently when the input trees are mostly congruent. Then, for the particular case where any triple of leaves is contained in at least one input tree, we give $ O(4^p n^3) $ and $ O(3.12^p + n^4) $ time algorithms, obtaining the first fixed-parameter tractable algorithms on a single parameter for this problem. We also obtain intractability results for several combinations of parameters, thus indicating that it is unlikely that fixed-parameter tractable algorithms can be found in these particular cases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithms; maximum agreement supertree; parameterized complexity; Phylogenetics; reductions; rooted triples.", } @Article{Liu:2010:MPI, author = "Xiaowen Liu and Jinyan Li and Lusheng Wang", title = "Modeling Protein Interacting Groups by Quasi-Bicliques: Complexity, Algorithm, and Application", journal = j-TCBB, volume = "7", number = "2", pages = "354--364", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.61", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-protein interactions (PPIs) are one of the most important mechanisms in cellular processes. To model protein interaction sites, recent studies have suggested to find interacting protein group pairs from large PPI networks at the first step and then to search conserved motifs within the protein groups to form interacting motif pairs. To consider the noise effect and the incompleteness of biological data, we propose to use quasi-bicliques for finding interacting protein group pairs. We investigate two new problems that arise from finding interacting protein group pairs: the maximum vertex quasi-biclique problem and the maximum balanced quasi-biclique problem. We prove that both problems are NP-hard. This is a surprising result as the widely known maximum vertex biclique problem is polynomial time solvable [1]. We then propose a heuristic algorithm that uses the greedy method to find the quasi-bicliques from PPI networks. Our experiment results on real data show that this algorithm has a better performance than a benchmark algorithm for identifying highly matched BLOCKS and PRINTS motifs. We also report results of two case studies on interacting motif pairs that map well with two interacting domain pairs in iPfam. Availability: The software and supplementary information are available at \path=http://www.cs.cityu.edu.hk/~lwang/software/ppi/index.html=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "interaction sites; Protein-protein interactions; quasi-bicliques.", } @Article{Qi:2010:SGR, author = "Xingqin Qi and Guojun Li and Shuguang Li and Ying Xu", title = "Sorting Genomes by Reciprocal Translocations, Insertions, and Deletions", journal = j-TCBB, volume = "7", number = "2", pages = "365--374", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.53", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of sorting by reciprocal translocations (abbreviated as SBT) arises from the field of comparative genomics, which is to find a shortest sequence of reciprocal translocations that transforms one genome $ \Pi $ into another genome $ \Gamma $, with the restriction that $ \Pi $ and $ \Gamma $ contain the same genes. SBT has been proved to be polynomial-time solvable, and several polynomial algorithms have been developed. In this paper, we show how to extend Bergeron's SBT algorithm to include insertions and deletions, allowing to compare genomes containing different genes. In particular, if the gene set of $ \Pi $ is a subset (or superset, respectively) of the gene set of $ \Gamma $, we present an approximation algorithm for transforming $ \Pi $ into $ \Gamma $ by reciprocal translocations and deletions (insertions, respectively), providing a sorting sequence with length at most OPT + 2, where OPT is the minimum number of translocations and deletions (insertions, respectively) needed to transform $ \Pi $ into $ \Gamma $; if $ \Pi $ and $ \Gamma $ have different genes but not containing each other, we give a heuristic to transform $ \Pi $ into $ \Gamma $ by a shortest sequence of reciprocal translocations, insertions, and deletions, with bounds for the length of the sorting sequence it outputs. At a conceptual level, there is some similarity between our algorithm and the algorithm developed by El Mabrouk which is used to sort two chromosomes with different gene contents by reversals, insertions, and deletions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "algorithm.; deletion; insertion; Translocation", } @Article{Unger:2010:LSG, author = "Giora Unger and Benny Chor", title = "Linear Separability of Gene Expression Data Sets", journal = j-TCBB, volume = "7", number = "2", pages = "375--381", month = apr, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.90", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 7 16:01:51 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study simple geometric properties of gene expression data sets, where samples are taken from two distinct classes (e.g., two types of cancer). Specifically, the problem of linear separability for pairs of genes is investigated. If a pair of genes exhibits linear separation with respect to the two classes, then the joint expression level of the two genes is strongly correlated to the phenomena of the sample being taken from one class or the other. This may indicate an underlying molecular mechanism relating the two genes and the phenomena(e.g., a specific cancer). We developed and implemented novel efficient algorithmic tools for finding all pairs of genes that induce a linear separation of the two sample classes. These tools are based on computational geometric properties and were applied to 10 publicly available cancer data sets. For each data set, we computed the number of actual separating pairs and compared it to an upper bound on the number expected by chance and to the numbers resulting from shuffling the labels of the data at random empirically. Seven out of these 10 data sets are highly separable. Statistically, this phenomenon is highly significant, very unlikely to occur at random. It is therefore reasonable to expect that it manifests a functional association between separating genes and the underlying phenotypic classes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", keywords = "diagnosis; DNA microarrays; Gene expression analysis; linear separation.", } @Article{Leitner:2010:OBI, author = "Florian Leitner and Scott A. Mardis and Martin Krallinger and Gianni Cesareni and Lynette A. Hirschman and Alfonso Valencia", title = "An Overview of {BioCreative II.5}", journal = j-TCBB, volume = "7", number = "3", pages = "385--399", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.61", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kolchinsky:2010:CPP, author = "Artemy Kolchinsky and Alaa Abi-Haidar and Jasleen Kaur and Ahmed Abdeen Hamed and Luis M. Rocha", title = "Classification of Protein-Protein Interaction Full-Text Documents Using Text and Citation Network Features", journal = j-TCBB, volume = "7", number = "3", pages = "400--411", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.55", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dai:2010:MGN, author = "Hong-Jie Dai and Po-Ting Lai and Richard Tzong-Han Tsai", title = "Multistage Gene Normalization and {SVM}-Based Ranking for Protein Interactor Extraction in Full-Text Articles", journal = j-TCBB, volume = "7", number = "3", pages = "412--420", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.45", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lan:2010:EIF, author = "Man Lan and Jian Su", title = "Empirical Investigations into Full-Text Protein Interaction Article Categorization Task {(ACT)} in the {BioCreative II.5} Challenge", journal = j-TCBB, volume = "7", number = "3", pages = "421--427", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.49", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2010:BSI, author = "Yifei Chen and Feng Liu and Bernard Manderick", title = "{BioLMiner} System: Interaction Normalization Task and Interaction Pair Task in the {BioCreative II.5} Challenge", journal = j-TCBB, volume = "7", number = "3", pages = "428--441", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.47", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Saetre:2010:EPI, author = "Rune S{\ae}tre and Kazuhiro Yoshida and Makoto Miwa and Takuya Matsuzaki and Yoshinobu Kano and Jun'ichi Tsujii", title = "Extracting Protein Interactions from Text with the Unified {AkaneRE} Event Extraction System", journal = j-TCBB, volume = "7", number = "3", pages = "442--453", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.46", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cao:2010:IAM, author = "Yong-gang Cao and Zuofeng Li and Feifan Liu and Shashank Agarwal and Qing Zhang and Hong Yu", title = "An {IR}-Aided Machine Learning Framework for the {BioCreative II.5} Challenge", journal = j-TCBB, volume = "7", number = "3", pages = "454--461", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.56", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Verspoor:2010:ESB, author = "Karin Verspoor and Christophe Roeder and Helen L. Johnson and Kevin Bretonnel Cohen and William A. {Baumgartner, Jr.} and Lawrence E. Hunter", title = "Exploring Species-Based Strategies for Gene Normalization", journal = j-TCBB, volume = "7", number = "3", pages = "462--471", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.48", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rinaldi:2010:OBI, author = "Fabio Rinaldi and Gerold Schneider and Kaarel Kaljurand and Simon Clematide and Th{\'e}r{\`e}se Vachon and Martin Romacker", title = "{OntoGene} in {BioCreative II.5}", journal = j-TCBB, volume = "7", number = "3", pages = "472--480", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.50", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hakenberg:2010:EEP, author = "J{\"o}rg Hakenberg and Robert Leaman and Nguyen Ha Vo and Siddhartha Jonnalagadda and Ryan Sullivan and Christopher Miller and Luis Tari and Chitta Baral and Graciela Gonzalez", title = "Efficient Extraction of Protein-Protein Interactions from Full-Text Articles", journal = j-TCBB, volume = "7", number = "3", pages = "481--494", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.51", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chowdhury:2010:COD, author = "Rezaul Alan Chowdhury and Hai-Son Le and Vijaya Ramachandran", title = "Cache-Oblivious Dynamic Programming for Bioinformatics", journal = j-TCBB, volume = "7", number = "3", pages = "495--510", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.94", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tininini:2010:CHA, author = "Leonardo Tininini and Paola Bertolazzi and Alessandra Godi and Giuseppe Lancia", title = "{CollHaps}: a Heuristic Approach to Haplotype Inference by Parsimony", journal = j-TCBB, volume = "7", number = "3", pages = "511--523", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.130", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jackups:2010:CAS, author = "Ronald {Jackups, Jr.} and Jie Liang", title = "Combinatorial Analysis for Sequence and Spatial Motif Discovery in Short Sequence Fragments", journal = j-TCBB, volume = "7", number = "3", pages = "524--536", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.101", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Han:2010:NPC, author = "Xiaoxu Han", title = "Nonnegative Principal Component Analysis for Cancer Molecular Pattern Discovery", journal = j-TCBB, volume = "7", number = "3", pages = "537--549", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.36", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zeng:2010:SSC, author = "Jia Zeng and Xiao-Yu Zhao and Xiao-Qin Cao and Hong Yan", title = "{SCS}: Signal, Context, and Structure Features for Genome-Wide Human Promoter Recognition", journal = j-TCBB, volume = "7", number = "3", pages = "550--562", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.95", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zimek:2010:SHF, author = "Arthur Zimek and Fabian Buchwald and Eibe Frank and Stefan Kramer", title = "A Study of Hierarchical and Flat Classification of Proteins", journal = j-TCBB, volume = "7", number = "3", pages = "563--571", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.104", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonet:2010:CUD, author = "Maria Luisa Bonet and Katherine {St. John}", title = "On the Complexity of {uSPR} Distance", journal = j-TCBB, volume = "7", number = "3", pages = "572--576", month = jul, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.132", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 20 13:49:55 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mandou:2010:GEI, author = "Ion Mandou and Giri Narasimhan and Yi Pan and Yanqing Zhang", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "7", number = "4", pages = "577--578", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Munoz:2010:RPG, author = "Adriana Munoz and David Sankoff", title = "Rearrangement Phylogeny of Genomes in Contig Form", journal = j-TCBB, volume = "7", number = "4", pages = "579--587", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.66", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Venkatachalam:2010:UTC, author = "Balaji Venkatachalam and Jim Apple and Katherine {St. John} and Daniel Gusfield", title = "Untangling Tanglegrams: Comparing Trees by Their Drawings", journal = j-TCBB, volume = "7", number = "4", pages = "588--597", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.57", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonizzoni:2010:PPX, author = "Paola Bonizzoni and Gianluca Della Vedova and Riccardo Dondi and Yuri Pirola and Romeo Rizzi", title = "Pure Parsimony Xor Haplotyping", journal = j-TCBB, volume = "7", number = "4", pages = "598--610", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.52", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2010:ECC, author = "Yufeng Wu", title = "Exact Computation of Coalescent Likelihood for Panmictic and Subdivided Populations under the Infinite Sites Model", journal = j-TCBB, volume = "7", number = "4", pages = "611--618", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.2", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rajasekaran:2010:IAP, author = "Sanguthevar Rajasekaran and Sahar {Al Seesi} and Reda A. Ammar", title = "Improved Algorithms for Parsing {ESLTAGs}: a Grammatical Model Suitable for {RNA} Pseudoknots", journal = j-TCBB, volume = "7", number = "4", pages = "619--627", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.54", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Blin:2010:QGP, author = "Guillaume Blin and Florian Sikora and Stephane Vialette", title = "Querying Graphs in Protein-Protein Interactions Networks Using Feedback Vertex Set", journal = j-TCBB, volume = "7", number = "4", pages = "628--635", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.53", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2010:SLM, author = "Qiang Cheng", title = "A Sparse Learning Machine for High-Dimensional Data with Application to Microarray Gene Analysis", journal = j-TCBB, volume = "7", number = "4", pages = "636--646", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.8", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Langdon:2010:SSD, author = "W. B. Langdon and G. J. G. Upton and R. da Silva Camargo and A. P. Harrison", title = "A Survey of Spatial Defects in {Homo Sapiens Affymetrix GeneChips}", journal = j-TCBB, volume = "7", number = "4", pages = "647--653", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.108", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2010:CRA, author = "Gang Li and Tak-Ming Chan and Kwong-Sak Leung and Kin-Hong Lee", title = "A Cluster Refinement Algorithm for Motif Discovery", journal = j-TCBB, volume = "7", number = "4", pages = "654--668", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.25", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2010:FNN, author = "Jianjun Zhou and Jorg Sander and Zhipeng Cai and Lusheng Wang and Guohui Lin", title = "Finding the Nearest Neighbors in Biological Databases Using Less Distance Computations", journal = j-TCBB, volume = "7", number = "4", pages = "669--680", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.99", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2010:HRR, author = "Liang-Tsung Huang and Lien-Fu Lai and M. Michael Gromiha", title = "Human-Readable Rule Generator for Integrating Amino Acid Sequence Information and Stability of Mutant Proteins", journal = j-TCBB, volume = "7", number = "4", pages = "681--687", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Godin:2010:QDS, author = "Christophe Godin and Pascal Ferraro", title = "Quantifying the Degree of Self-Nestedness of Trees: Application to the Structural Analysis of Plants", journal = j-TCBB, volume = "7", number = "4", pages = "688--703", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.29", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Snir:2010:QMD, author = "Sagi Snir and Satish Rao", title = "Quartets {MaxCut}: a Divide and Conquer Quartets Algorithm", journal = j-TCBB, volume = "7", number = "4", pages = "704--718", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.133", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lu:2010:RNM, author = "Xin Lu and Anthony Gamst and Ronghui Xu", title = "{RDCurve}: a Nonparametric Method to Evaluate the Stability of Ranking Procedures", journal = j-TCBB, volume = "7", number = "4", pages = "719--726", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tsang:2010:SPA, author = "Herbert H. Tsang and Kay C. Wiese", title = "{SARNA-Predict}: Accuracy Improvement of {RNA} Secondary Structure Prediction Using Permutation-Based Simulated Annealing", journal = j-TCBB, volume = "7", number = "4", pages = "727--740", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.97", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{You:2010:UGP, author = "Liwen You and Vladimir Brusic and Marcus Gallagher and Mikael Boden", title = "Using {Gaussian} Process with Test Rejection to Detect {T}-Cell Epitopes in Pathogen Genomes", journal = j-TCBB, volume = "7", number = "4", pages = "741--751", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.131", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Apostolico:2010:VDE, author = "Alberto Apostolico and Matteo Comin and Laxmi Parida", title = "{VARUN}: Discovering Extensible Motifs under Saturation Constraints", journal = j-TCBB, volume = "7", number = "4", pages = "752--762", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.123", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Miklos:2010:MPI, author = "Istvan Miklos and Bence Melykuti and Krister Swenson", title = "The {Metropolized} Partial Importance Sampling {MCMC} Mixes Slowly on Minimum Reversal Rearrangement Paths", journal = j-TCBB, volume = "7", number = "4", pages = "763--767", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.26", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2010:TAI, author = "Anonymous", title = "2010 {TCBB} Annual Index", journal = j-TCBB, volume = "7", number = "4", pages = "763--767", month = oct, year = "2010", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.116", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:02 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sagot:2011:EEa, author = "Marie-France Sagot", title = "{EIC} Editorial", journal = j-TCBB, volume = "8", number = "1", pages = "1--1", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.7", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Owen:2011:FAC, author = "Megan Owen and J. Scott Provan", title = "A Fast Algorithm for Computing Geodesic Distances in Tree Space", journal = j-TCBB, volume = "8", number = "1", pages = "2--13", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.3", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Comparing and computing distances between phylogenetic trees are important biological problems, especially for models where edge lengths play an important role. The geodesic distance measure between two phylogenetic trees with edge lengths is the length of the shortest path between them in the continuous tree space introduced by Billera, Holmes, and Vogtmann. This tree space provides a powerful tool for studying and comparing phylogenetic trees, both in exhibiting a natural distance measure and in providing a Euclidean-like structure for solving optimization problems on trees. An important open problem is to find a polynomial time algorithm for finding geodesics in tree space.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shah:2011:GFA, author = "Mohak Shah and Jacques Corbeil", title = "A General Framework for Analyzing Data from Two Short Time-Series Microarray Experiments", journal = j-TCBB, volume = "8", number = "1", pages = "14--26", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.51", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a general theoretical framework for analyzing differentially expressed genes and behavior patterns from two homogeneous short time-course data. The framework generalizes the recently proposed Hilbert--Schmidt Independence Criterion (HSIC)-based framework adapting it to the time-series scenario by utilizing tensor analysis for data transformation. The proposed framework is effective in yielding criteria that can identify both the differentially expressed genes and time-course patterns of interest between two time-series experiments without requiring to explicitly cluster the data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mauch:2011:EFE, author = "Sean Mauch and Mark Stalzer", title = "Efficient Formulations for Exact Stochastic Simulation of Chemical Systems", journal = j-TCBB, volume = "8", number = "1", pages = "27--35", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.47", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One can generate trajectories to simulate a system of chemical reactions using either Gillespie's direct method or Gibson and Bruck's next reaction method. Because one usually needs many trajectories to understand the dynamics of a system, performance is important. In this paper, we present new formulations of these methods that improve the computational complexity of the algorithms. We present optimized implementations, available from \path=http://cain.sourceforge.net/=, that offer better performance than previous work. There is no single method that is best for all problems. Simple formulations often work best for systems with a small number of reactions, while some sophisticated methods offer the best performance for large problems and scale well asymptotically.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gasbarra:2011:EHF, author = "Dario Gasbarra and Sangita Kulathinal and Matti Pirinen and Mikko J. Sillanpaa", title = "Estimating Haplotype Frequencies by Combining Data from Large {DNA} Pools with Database Information", journal = j-TCBB, volume = "8", number = "1", pages = "36--44", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.71", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We assume that allele frequency data have been extracted from several large DNA pools, each containing genetic material of up to hundreds of sampled individuals. Our goal is to estimate the haplotype frequencies among the sampled individuals by combining the pooled allele frequency data with prior knowledge about the set of possible haplotypes. Such prior information can be obtained, for example, from a database such as HapMap. We present a Bayesian haplotyping method for pooled DNA based on a continuous approximation of the multinomial distribution. The proposed method is applicable when the sizes of the DNA pools and/or the number of considered loci exceed the limits of several earlier methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bajaj:2011:FFP, author = "Chandrajit L. Bajaj and Rezaul Chowdhury and Vinay Siddahanavalli", title = "{$ F^2 $Dock}: Fast {Fourier} Protein-Protein Docking", journal = j-TCBB, volume = "8", number = "1", pages = "45--58", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.57", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The functions of proteins are often realized through their mutual interactions. Determining a relative transformation for a pair of proteins and their conformations which form a stable complex, reproducible in nature, is known as docking. It is an important step in drug design, structure determination, and understanding function and structure relationships. In this paper, we extend our nonuniform fast Fourier transform-based docking algorithm to include an adaptive search phase (both translational and rotational) and thereby speed up its execution. We have also implemented a multithreaded version of the adaptive docking algorithm for even faster execution on multicore machines. We call this protein-protein docking code $ F^2 $Dock ($ F^2 $ = {\rm \underline{F}ast\underline{F}ourier}).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Giard:2011:FSB, author = "Joachim Giard and Patrice Rondao Alface and Jean-Luc Gala and Benoit Macq", title = "Fast Surface-Based Travel Depth Estimation Algorithm for Macromolecule Surface Shape Description", journal = j-TCBB, volume = "8", number = "1", pages = "59--68", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.53", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Travel Depth, introduced by Coleman and Sharp in 2006, is a physical interpretation of molecular depth, a term frequently used to describe the shape of a molecular active site or binding site. Travel Depth can be seen as the physical distance a solvent molecule would have to travel from a point of the surface, i.e., the Solvent-Excluded Surface (SES), to its convex hull. Existing algorithms providing an estimation of the Travel Depth are based on a regular sampling of the molecule volume and the use of the Dijkstra's shortest path algorithm. Since Travel Depth is only defined on the molecular surface, this volume-based approach is characterized by a large computational complexity due to the processing of unnecessary samples lying inside or outside the molecule.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pizzi:2011:FSM, author = "Cinzia Pizzi and Pasi Rastas and Esko Ukkonen", title = "Finding Significant Matches of Position Weight Matrices in Linear Time", journal = j-TCBB, volume = "8", number = "1", pages = "69--79", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.35", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Position weight matrices are an important method for modeling signals or motifs in biological sequences, both in DNA and protein contexts. In this paper, we present fast algorithms for the problem of finding significant matches of such matrices. Our algorithms are of the online type, and they generalize classical multipattern matching, filtering, and superalphabet techniques of combinatorial string matching to the problem of weight matrix matching. Several variants of the algorithms are developed, including multiple matrix extensions that perform the search for several matrices in one scan through the sequence database. Experimental performance evaluation is provided to compare the new techniques against each other as well as against some other online and index-based algorithms proposed in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Andonie:2011:FAP, author = "Razvan Andonie and Levente Fabry-Asztalos and Christopher B. Abdul-Wahid and Sarah Abdul-Wahid and Grant I. Barker and Lukas C. Magill", title = "Fuzzy {ARTMAP} Prediction of Biological Activities for Potential {HIV-1} Protease Inhibitors Using a Small Molecular Data Set", journal = j-TCBB, volume = "8", number = "1", pages = "80--93", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.50", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Obtaining satisfactory results with neural networks depends on the availability of large data samples. The use of small training sets generally reduces performance. Most classical Quantitative Structure-Activity Relationship (QSAR) studies for a specific enzyme system have been performed on small data sets. We focus on the neuro-fuzzy prediction of biological activities of HIV-1 protease inhibitory compounds when inferring from small training sets. We propose two computational intelligence prediction techniques which are suitable for small training sets, at the expense of some computational overhead. Both techniques are based on the FAMR model. The FAMR is a Fuzzy ARTMAP (FAM) incremental learning system used for classification and probability estimation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mitra:2011:GNS, author = "Sushmita Mitra and Ranajit Das and Yoichi Hayashi", title = "Genetic Networks and Soft Computing", journal = j-TCBB, volume = "8", number = "1", pages = "94--107", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.39", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The analysis of gene regulatory networks provides enormous information on various fundamental cellular processes involving growth, development, hormone secretion, and cellular communication. Their extraction from available gene expression profiles is a challenging problem. Such reverse engineering of genetic networks offers insight into cellular activity toward prediction of adverse effects of new drugs or possible identification of new drug targets. Tasks such as classification, clustering, and feature selection enable efficient mining of knowledge about gene interactions in the form of networks. It is known that biological data is prone to different kinds of noise and ambiguity. Soft computing tools, such as fuzzy sets, evolutionary strategies, and neurocomputing, have been found to be helpful in providing low-cost, acceptable solutions in the presence of various types of uncertainties.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2011:IMG, author = "Wenxue Wang and Bijoy K. Ghosh and Himadri Pakrasi", title = "Identification and Modeling of Genes with Diurnal Oscillations from Microarray Time Series Data", journal = j-TCBB, volume = "8", number = "1", pages = "108--121", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.37", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Behavior of living organisms is strongly modulated by the day and night cycle giving rise to a cyclic pattern of activities. Such a pattern helps the organisms to coordinate their activities and maintain a balance between what could be performed during the ``day'' and what could be relegated to the ``night.'' This cyclic pattern, called the ``Circadian Rhythm,'' is a biological phenomenon observed in a large number of organisms. In this paper, our goal is to analyze transcriptome data from Cyanothece for the purpose of discovering genes whose expressions are rhythmic. We cluster these genes into groups that are close in terms of their phases and show that genes from a specific metabolic functional category are tightly clustered, indicating perhaps a ``preferred time of the day/night'' when the organism performs this function.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2011:ICE, author = "Lin-Kai Luo and Deng-Feng Huang and Ling-Jun Ye and Qi-Feng Zhou and Gui-Fang Shao and Hong Peng", title = "Improving the Computational Efficiency of Recursive Cluster Elimination for Gene Selection", journal = j-TCBB, volume = "8", number = "1", pages = "122--129", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.44", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The gene expression data are usually provided with a large number of genes and a relatively small number of samples, which brings a lot of new challenges. Selecting those informative genes becomes the main issue in microarray data analysis. Recursive cluster elimination based on support vector machine (SVM-RCE) has shown the better classification accuracy on some microarray data sets than recursive feature elimination based on support vector machine (SVM-RFE). However, SVM-RCE is extremely time-consuming. In this paper, we propose an improved method of SVM-RCE called ISVM-RCE. ISVM-RCE first trains a SVM model with all clusters, then applies the infinite norm of weight coefficient vector in each cluster to score the cluster, finally eliminates the gene clusters with the lowest score.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tan:2011:IPK, author = "Mehmet Tan and Mohammed Alshalalfa and Reda Alhajj and Faruk Polat", title = "Influence of Prior Knowledge in Constraint-Based Learning of Gene Regulatory Networks", journal = j-TCBB, volume = "8", number = "1", pages = "130--142", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.58", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Constraint-based structure learning algorithms generally perform well on sparse graphs. Although sparsity is not uncommon, there are some domains where the underlying graph can have some dense regions; one of these domains is gene regulatory networks, which is the main motivation to undertake the study described in this paper. We propose a new constraint-based algorithm that can both increase the quality of output and decrease the computational requirements for learning the structure of gene regulatory networks. The algorithm is based on and extends the PC algorithm. Two different types of information are derived from the prior knowledge; one is the probability of existence of edges, and the other is the nodes that seem to be dependent on a large number of nodes compared to other nodes in the graph.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gong:2011:ITM, author = "Liuling Gong and Nidhal Bouaynaya and Dan Schonfeld", title = "Information-Theoretic Model of Evolution over Protein Communication Channel", journal = j-TCBB, volume = "8", number = "1", pages = "143--151", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.1", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we propose a communication model of evolution and investigate its information-theoretic bounds. The process of evolution is modeled as the retransmission of information over a protein communication channel, where the transmitted message is the organism's proteome encoded in the DNA. We compute the capacity and the rate distortion functions of the protein communication system for the three domains of life: Archaea, Bacteria, and Eukaryotes. The tradeoff between the transmission rate and the distortion in noisy protein communication channels is analyzed. As expected, comparison between the optimal transmission rate and the channel capacity indicates that the biological fidelity does not reach the Shannon optimal distortion.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Barker:2011:LGR, author = "Nathan A. Barker and Chris J. Myers and Hiroyuki Kuwahara", title = "Learning Genetic Regulatory Network Connectivity from Time Series Data", journal = j-TCBB, volume = "8", number = "1", pages = "152--165", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.48", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent experimental advances facilitate the collection of time series data that indicate which genes in a cell are expressed. This information can be used to understand the genetic regulatory network that generates the data. Typically, Bayesian analysis approaches are applied which neglect the time series nature of the experimental data, have difficulty in determining the direction of causality, and do not perform well on networks with tight feedback. To address these problems, this paper presents a method to learn genetic network connectivity which exploits the time series nature of experimental data to achieve better causal predictions. This method first breaks up the data into bins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ropers:2011:MRU, author = "Delphine Ropers and Valentina Baldazzi and Hidde de Jong", title = "Model Reduction Using Piecewise-Linear Approximations Preserves Dynamic Properties of the Carbon Starvation Response in \bioname{Escherichia coli}", journal = j-TCBB, volume = "8", number = "1", pages = "166--181", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.49", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The adaptation of the bacterium Escherichia coli to carbon starvation is controlled by a large network of biochemical reactions involving genes, mRNAs, proteins, and signalling molecules. The dynamics of these networks is difficult to analyze, notably due to a lack of quantitative information on parameter values. To overcome these limitations, model reduction approaches based on quasi-steady-state (QSS) and piecewise-linear (PL) approximations have been proposed, resulting in models that are easier to handle mathematically and computationally. These approximations are not supposed to affect the capability of the model to account for essential dynamical properties of the system, but the validity of this assumption has not been systematically tested.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2011:NMI, author = "Yufeng Wu", title = "New Methods for Inference of Local Tree Topologies with Recombinant {SNP} Sequences in Populations", journal = j-TCBB, volume = "8", number = "1", pages = "182--193", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.27", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Large amount of population-scale genetic variation data are being collected in populations. One potentially important biological problem is to infer the population genealogical history from these genetic variation data. Partly due to recombination, genealogical history of a set of DNA sequences in a population usually cannot be represented by a single tree. Instead, genealogy is better represented by a genealogical network, which is a compact representation of a set of correlated local genealogical trees, each for a short region of genome and possibly with different topology. Inference of genealogical history for a set of DNA sequences under recombination has many potential applications, including association mapping of complex diseases.In this paper, we present two new methods for reconstructing local tree topologies with the presence of recombination, which extend and improve the previous work in.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Agrawal:2011:PSS, author = "Ankit Agrawal and Xiaoqiu Huang", title = "Pairwise Statistical Significance of Local Sequence Alignment Using Sequence-Specific and Position-Specific Substitution Matrices", journal = j-TCBB, volume = "8", number = "1", pages = "194--205", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.69", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pairwise sequence alignment is a central problem in bioinformatics, which forms the basis of various other applications. Two related sequences are expected to have a high alignment score, but relatedness is usually judged by statistical significance rather than by alignment score. Recently, it was shown that pairwise statistical significance gives promising results as an alternative to database statistical significance for getting individual significance estimates of pairwise alignment scores. The improvement was mainly attributed to making the statistical significance estimation process more sequence-specific and database-independent. In this paper, we use sequence-specific and position-specific substitution matrices to derive the estimates of pairwise statistical significance, which is expected to use more sequence-specific information in estimating pairwise statistical significance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{vanBerlo:2011:PMF, author = "Rogier J. P. van Berlo and Dick de Ridder and Jean-Marc Daran and Pascale A. S. Daran-Lapujade and Bas Teusink and Marcel J. T. Reinders", title = "Predicting Metabolic Fluxes Using Gene Expression Differences As Constraints", journal = j-TCBB, volume = "8", number = "1", pages = "206--216", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.55", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A standard approach to estimate intracellular fluxes on a genome-wide scale is flux-balance analysis (FBA), which optimizes an objective function subject to constraints on (relations between) fluxes. The performance of FBA models heavily depends on the relevance of the formulated objective function and the completeness of the defined constraints. Previous studies indicated that FBA predictions can be improved by adding regulatory on/off constraints. These constraints were imposed based on either absolute or relative gene expression values. We provide a new algorithm that directly uses regulatory up/down constraints based on gene expression data in FBA optimization (tFBA). Our assumption is that if the activity of a gene drastically changes from one condition to the other, the flux through the reaction controlled by that gene will change accordingly.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lahti:2011:PAP, author = "Leo Lahti and Laura L. Elo and Tero Aittokallio and Samuel Kaski", title = "Probabilistic Analysis of Probe Reliability in Differential Gene Expression Studies with Short Oligonucleotide Arrays", journal = j-TCBB, volume = "8", number = "1", pages = "217--225", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.38", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Probe defects are a major source of noise in gene expression studies. While existing approaches detect noisy probes based on external information such as genomic alignments, we introduce and validate a targeted probabilistic method for analyzing probe reliability directly from expression data and independently of the noise source. This provides insights into the various sources of probe-level noise and gives tools to guide probe design.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kc:2011:TIP, author = "Dukka B. Kc and Dennis R. Livesay", title = "Topology Improves Phylogenetic Motif Functional Site Predictions", journal = j-TCBB, volume = "8", number = "1", pages = "226--233", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.60", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of protein functional sites from sequence-derived data remains an open bioinformatics problem. We have developed a phylogenetic motif (PM) functional site prediction approach that identifies functional sites from alignment fragments that parallel the evolutionary patterns of the family. In our approach, PMs are identified by comparing tree topologies of each alignment fragment to that of the complete phylogeny. Herein, we bypass the phylogenetic reconstruction step and identify PMs directly from distance matrix comparisons. In order to optimize the new algorithm, we consider three different distance matrices and 13 different matrix similarity scores. We assess the performance of the various approaches on a structurally nonredundant data set that includes three types of functional site definitions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hoque:2011:TRG, author = "Md Tamjidul Hoque and Madhu Chetty and Andrew Lewis and Abdul Sattar", title = "Twin Removal in Genetic Algorithms for Protein Structure Prediction Using Low-Resolution Model", journal = j-TCBB, volume = "8", number = "1", pages = "234--245", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.34", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents the impact of twins and the measures for their removal from the population of genetic algorithm (GA) when applied to effective conformational searching. It is conclusively shown that a twin removal strategy for a GA provides considerably enhanced performance when investigating solutions to complex ab initio protein structure prediction (PSP) problems in low-resolution model. Without twin removal, GA crossover and mutation operations can become ineffectual as generations lose their ability to produce significant differences, which can lead to the solution stalling.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{daCosta:2011:WPC, author = "Joaquim F. Pinto da Costa and Hugo Alonso and Luis Roque", title = "A Weighted Principal Component Analysis and Its Application to Gene Expression Data", journal = j-TCBB, volume = "8", number = "1", pages = "246--252", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.61", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, we introduce in the first part new developments in Principal Component Analysis (PCA) and in the second part a new method to select variables (genes in our application). Our focus is on problems where the values taken by each variable do not all have the same importance and where the data may be contaminated with noise and contain outliers, as is the case with microarray data. The usual PCA is not appropriate to deal with this kind of problems. In this context, we propose the use of a new correlation coefficient as an alternative to Pearson's. This leads to a so-called weighted PCA (WPCA).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2011:DAN, author = "Ping Li and James Lam", title = "Disturbance Analysis of Nonlinear Differential Equation Models of Genetic {SUM} Regulatory Networks", journal = j-TCBB, volume = "8", number = "1", pages = "253--259", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.19", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Noise disturbances and time delays are frequently met in cellular genetic regulatory systems. This paper is concerned with the disturbance analysis of a class of genetic regulatory networks described by nonlinear differential equation models. The mechanisms of genetic regulatory networks to amplify (attenuate) external disturbance are explored, and a simple measure of the amplification (attenuation) level is developed from a nonlinear robust control point of view. It should be noted that the conditions used to measure the disturbance level are delay-independent or delay-dependent, and are expressed within the framework of linear matrix inequalities, which can be characterized as convex optimization, and computed by the interior-point algorithm easily.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2011:LTA, author = "Cheng-Wei Luo and Ming-Chiang Chen and Yi-Ching Chen and Roger W. L. Yang and Hsiao-Fei Liu and Kun-Mao Chao", title = "Linear-Time Algorithms for the Multiple Gene Duplication Problems", journal = j-TCBB, volume = "8", number = "1", pages = "260--265", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.52", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A fundamental problem arising in the evolutionary molecular biology is to discover the locations of gene duplications and multiple gene duplication episodes based on the phylogenetic information. The solutions to the MULTIPLE GENE DUPLICATION problems can provide useful clues to place the gene duplication events onto the locations of a species tree and to expose the multiple gene duplication episodes. In this paper, we study two variations of the MULTIPLE GENE DUPLICATION problems: the EPISODE-CLUSTERING (EC) problem and the MINIMUM EPISODES (ME) problem. For the EC problem, we improve the results of Burleigh et al. with an optimal linear-time algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mao:2011:RMS, author = "Kezhi Z. Mao and Wenyin Tang", title = "Recursive {Mahalanobis} Separability Measure for Gene Subset Selection", journal = j-TCBB, volume = "8", number = "1", pages = "266--272", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.43", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mahalanobis class separability measure provides an effective evaluation of the discriminative power of a feature subset, and is widely used in feature selection. However, this measure is computationally intensive or even prohibitive when it is applied to gene expression data. In this study, a recursive approach to Mahalanobis measure evaluation is proposed, with the goal of reducing computational overhead. Instead of evaluating Mahalanobis measure directly in high-dimensional space, the recursive approach evaluates the measure through successive evaluations in 2D space. Because of its recursive nature, this approach is extremely efficient when it is combined with a forward search procedure. In addition, it is noted that gene subsets selected by Mahalanobis measure tend to overfit training data and generalize unsatisfactorily on unseen test data, due to small sample size in gene expression problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Krishnamurthy:2011:SMM, author = "Vikram Krishnamurthy and Kai-Yiu Luk", title = "Semi-{Markov} Models for {Brownian} Dynamics Permeation in Biological Ion Channels", journal = j-TCBB, volume = "8", number = "1", pages = "273--281", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.136", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Constructing accurate computational models that explain how ions permeate through a biological ion channel is an important problem in biophysics and drug design. Brownian dynamics simulations are large-scale interacting particle computer simulations for modeling ion channel permeation but can be computationally prohibitive. In this paper, we show the somewhat surprising result that a small-dimensional semi-Markov model can generate events (such as conduction events and dwell times at binding sites in the protein) that are statistically indistinguishable from Brownian dynamics computer simulation. This approach enables the use of extrapolation techniques to predict channel conduction when performing the actual Brownian dynamics simulation that is computationally intractable.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Krishnamurthy:2011:TRL, author = "Vikram Krishnamurthy and Kai-Yiu Luk", title = "2010 {TCBB} Reviewers List", journal = j-TCBB, volume = "8", number = "1", pages = "282--284", month = jan, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.1", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Dec 20 18:39:04 MST 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sagot:2011:EEb, author = "Marie-France Sagot", title = "{EIC} Editorial", journal = j-TCBB, volume = "8", number = "2", pages = "289--291", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.15", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2011:GES, author = "Fang-Xiang Wu and Jun Huan", title = "Guest Editorial: Special Focus on Bioinformatics and Systems Biology", journal = j-TCBB, volume = "8", number = "2", pages = "292--293", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.16", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2011:FSF, author = "Yanpeng Li and Xiaohua Hu and Hongfei Lin and Zhiahi Yang", title = "A Framework for Semisupervised Feature Generation and Its Applications in Biomedical Literature Mining", journal = j-TCBB, volume = "8", number = "2", pages = "294--307", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.99", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Feature representation is essential to machine learning and text mining. In this paper, we present a feature coupling generalization (FCG) framework for generating new features from unlabeled data. It selects two special types of features, i.e., example-distinguishing features (EDFs) and class-distinguishing features (CDFs) from original feature set, and then generalizes EDFs into higher-level features based on their coupling degrees with CDFs in unlabeled data. The advantage is: EDFs with extreme sparsity in labeled data can be enriched by their co-occurrences with CDFs in unlabeled data so that the performance of these low-frequency features can be greatly boosted and new information from unlabeled can be incorporated.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jeong:2011:PSS, author = "Jong Cheol Jeong and Xiaotong Lin and Xue-wen Chen", title = "On Position-Specific Scoring Matrix for Protein Function Prediction", journal = j-TCBB, volume = "8", number = "2", pages = "308--315", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.93", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "While genome sequencing projects have generated tremendous amounts of protein sequence data for a vast number of genomes, substantial portions of most genomes are still unannotated. Despite the success of experimental methods for identifying protein functions, they are often lab intensive and time consuming. Thus, it is only practical to use in silico methods for the genome-wide functional annotations. In this paper, we propose new features extracted from protein sequence only and machine learning-based methods for computational function prediction. These features are derived from a position-specific scoring matrix, which has shown great potential in other bininformatics problems. We evaluate these features using four different classifiers and yeast protein data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Oh:2011:ELA, author = "Sangyoon Oh and Min Su Lee and Byoung-Tak Zhang", title = "Ensemble Learning with Active Example Selection for Imbalanced Biomedical Data Classification", journal = j-TCBB, volume = "8", number = "2", pages = "316--325", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.96", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In biomedical data, the imbalanced data problem occurs frequently and causes poor prediction performance for minority classes. It is because the trained classifiers are mostly derived from the majority class. In this paper, we describe an ensemble learning method combined with active example selection to resolve the imbalanced data problem. Our method consists of three key components: (1) an active example selection algorithm to choose informative examples for training the classifier, (2) an ensemble learning method to combine variations of classifiers derived by active example selection, and (3) an incremental learning scheme to speed up the iterative training procedure for active example selection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ibrahim:2011:UQP, author = "Zina Ibrahim and Alioune Ngom and Ahmed Y. Tawfik", title = "Using Qualitative Probability in Reverse-Engineering Gene Regulatory Networks", journal = j-TCBB, volume = "8", number = "2", pages = "326--334", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.98", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper demonstrates the use of qualitative probabilistic networks (QPNs) to aid Dynamic Bayesian Networks (DBNs) in the process of learning the structure of gene regulatory networks from microarray gene expression data. We present a study which shows that QPNs define monotonic relations that are capable of identifying regulatory interactions in a manner that is less susceptible to the many sources of uncertainty that surround gene expression data. Moreover, we construct a model that maps the regulatory interactions of genetic networks to QPN constructs and show its capability in providing a set of candidate regulators for target genes, which is subsequently used to establish a prior structure that the DBN learning algorithm can use and which (1) distinguishes spurious correlations from true regulations, (2) enables the discovery of sets of coregulators of target genes, and (3) results in a more efficient construction of gene regulatory networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sabnis:2011:CTD, author = "Amit Sabnis and Robert W. Harrison", title = "A Continuous-Time, Discrete-State Method for Simulating the Dynamics of Biochemical Systems", journal = j-TCBB, volume = "8", number = "2", pages = "335--341", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.97", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational systems biology is largely driven by mathematical modeling and simulation of biochemical networks, via continuous deterministic methods or discrete event stochastic methods. Although the deterministic methods are efficient in predicting the macroscopic behavior of a biochemical system, they are severely limited by their inability to represent the stochastic effects of random molecular fluctuations at lower concentration. In this work, we have presented a novel method for simulating biochemical networks based on a deterministic solution with a modification that permits the incorporation of stochastic effects. To demonstrate the feasibility of our approach, we have tested our method on three previously reported biochemical networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Raiford:2011:GOA, author = "Douglas W. Raiford and Dan E. Krane and Travis E. Doom and Michael L. Raymer", title = "A Genetic Optimization Approach for Isolating Translational Efficiency Bias", journal = j-TCBB, volume = "8", number = "2", pages = "342--352", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.24", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The study of codon usage bias is an important research area that contributes to our understanding of molecular evolution, phylogenetic relationships, respiratory lifestyle, and other characteristics. Translational efficiency bias is perhaps the most well-studied codon usage bias, as it is frequently utilized to predict relative protein expression levels. We present a novel approach to isolating translational efficiency bias in microbial genomes. There are several existent methods for isolating translational efficiency bias. Previous approaches are susceptible to the confounding influences of other potentially dominant biases. Additionally, existing approaches to identifying translational efficiency bias generally require both genomic sequence information and prior knowledge of a set of highly expressed genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ram:2011:MBB, author = "Ramesh Ram and Madhu Chetty", title = "A {Markov-Blanket}-Based Model for Gene Regulatory Network Inference", journal = j-TCBB, volume = "8", number = "2", pages = "353--367", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.70", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An efficient two-step Markov blanket method for modeling and inferring complex regulatory networks from large-scale microarray data sets is presented. The inferred gene regulatory network (GRN) is based on the time series gene expression data capturing the underlying gene interactions. For constructing a highly accurate GRN, the proposed method performs: (1) discovery of a gene's Markov Blanket (MB), (2) formulation of a flexible measure to determine the network's quality, (3) efficient searching with the aid of a guided genetic algorithm, and (4) pruning to obtain a minimal set of correct interactions. Investigations are carried out using both synthetic as well as yeast cell cycle gene expression data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2011:PSC, author = "Zengyou He and Can Yang and Weichuan Yu", title = "A Partial Set Covering Model for Protein Mixture Identification Using Mass Spectrometry Data", journal = j-TCBB, volume = "8", number = "2", pages = "368--380", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.54", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein identification is a key and essential step in mass spectrometry (MS) based proteome research. To date, there are many protein identification strategies that employ either MS data or MS/MS data for database searching. While MS-based methods provide wider coverage than MS/MS-based methods, their identification accuracy is lower since MS data have less information than MS/MS data. Thus, it is desired to design more sophisticated algorithms that achieve higher identification accuracy using MS data. Peptide Mass Fingerprinting (PMF) has been widely used to identify single purified proteins from MS data for many years. In this paper, we extend this technology to protein mixture identification.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2011:ACC, author = "Yonghui Wu and Timothy J. Close and Stefano Lonardi", title = "Accurate Construction of Consensus Genetic Maps via Integer Linear Programming", journal = j-TCBB, volume = "8", number = "2", pages = "381--394", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.35", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study the problem of merging genetic maps, when the individual genetic maps are given as directed acyclic graphs. The computational problem is to build a consensus map, which is a directed graph that includes and is consistent with all (or, the vast majority of) the markers in the input maps. However, when markers in the individual maps have ordering conflicts, the resulting consensus map will contain cycles. Here, we formulate the problem of resolving cycles in the context of a parsimonious paradigm that takes into account two types of errors that may be present in the input maps, namely, local reshuffles and global displacements.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Aydin:2011:BMA, author = "Zafer Aydin and Yucel Altunbasak and Hakan Erdogan", title = "{Bayesian} Models and Algorithms for Protein $ \beta $-Sheet Prediction", journal = j-TCBB, volume = "8", number = "2", pages = "395--409", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2008.140", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of the 3D structure greatly benefits from the information related to secondary structure, solvent accessibility, and nonlocal contacts that stabilize a protein's structure. We address the problem of $ \beta $-sheet prediction defined as the prediction of $ \beta $--strand pairings, interaction types (parallel or antiparallel), and $ \beta $-residue interactions (or contact maps). We introduce a Bayesian approach for proteins with six or less $ \beta $-strands in which we model the conformational features in a probabilistic framework by combining the amino acid pairing potentials with a priori knowledge of $ \beta $-strand organizations. To select the optimum $ \beta $-sheet architecture, we significantly reduce the search space by heuristics that enforce the amino acid pairs with strong interaction potentials.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cardona:2011:CGT, author = "Gabriel Cardona and Merce Llabres and Francesc Rossello and Gabriel Valiente", title = "Comparison of Galled Trees", journal = j-TCBB, volume = "8", number = "2", pages = "410--427", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.60", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Galled trees, directed acyclic graphs that model evolutionary histories with isolated hybridization events, have become very popular due to both their biological significance and the existence of polynomial-time algorithms for their reconstruction. In this paper, we establish to which extent several distance measures for the comparison of evolutionary networks are metrics for galled trees, and hence, when they can be safely used to evaluate galled tree reconstruction methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Leung:2011:DMD, author = "KwongSak Leung and KinHong Lee and JinFeng Wang and Eddie YT Ng and Henry LY Chan and Stephen KW Tsui and Tony SK Mok and Pete Chi-Hang Tse and Joseph JY Sung", title = "Data Mining on {DNA} Sequences of {Hepatitis B} Virus", journal = j-TCBB, volume = "8", number = "2", pages = "428--440", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.6", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extraction of meaningful information from large experimental data sets is a key element in bioinformatics research. One of the challenges is to identify genomic markers in Hepatitis B Virus (HBV) that are associated with HCC (liver cancer) development by comparing the complete genomic sequences of HBV among patients with HCC and those without HCC. In this study, a data mining framework, which includes molecular evolution analysis, clustering, feature selection, classifier learning, and classification, is introduced. Our research group has collected HBV DNA sequences, either genotype B or C, from over 200 patients specifically for this project. In the molecular evolution analysis and clustering, three subgroups have been identified in genotype C and a clustering method has been developed to separate the subgroups.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lin:2011:DMF, author = "Tien-ho Lin and Robert F. Murphy and Ziv Bar-Joseph", title = "Discriminative Motif Finding for Predicting Protein Subcellular Localization", journal = j-TCBB, volume = "8", number = "2", pages = "441--451", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.82", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many methods have been described to predict the subcellular location of proteins from sequence information. However, most of these methods either rely on global sequence properties or use a set of known protein targeting motifs to predict protein localization. Here, we develop and test a novel method that identifies potential targeting motifs using a discriminative approach based on hidden Markov models (discriminative HMMs). These models search for motifs that are present in a compartment but absent in other, nearby, compartments by utilizing an hierarchical structure that mimics the protein sorting mechanism. We show that both discriminative motif finding and the hierarchical structure improve localization prediction on a benchmark data set of yeast proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Saraswathi:2011:IPE, author = "Saras Saraswathi and Suresh Sundaram and Narasimhan Sundararajan and Michael Zimmermann and Marit Nilsen-Hamilton", title = "{ICGA-PSO-ELM} Approach for Accurate Multiclass Cancer Classification Resulting in Reduced Gene Sets in Which Genes Encoding Secreted Proteins Are Highly Represented", journal = j-TCBB, volume = "8", number = "2", pages = "452--463", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.13", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A combination of Integer-Coded Genetic Algorithm (ICGA) and Particle Swarm Optimization (PSO), coupled with the neural-network-based Extreme Learning Machine (ELM), is used for gene selection and cancer classification. ICGA is used with PSO\_ELM to select an optimal set of genes, which is then used to build a classifier to develop an algorithm (ICGA\_PSO\_ELM) that can handle sparse data and sample imbalance. We evaluate the performance of ICGA\_PSO\_ELM and compare our results with existing methods in the literature. An investigation into the functions of the selected genes, using a systems biology approach, revealed that many of the identified genes are involved in cell signaling and proliferation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Irigoien:2011:MTC, author = "Itziar Irigoien and Sergi Vives and Concepcion Arenas", title = "Microarray Time Course Experiments: Finding Profiles", journal = j-TCBB, volume = "8", number = "2", pages = "464--475", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.79", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Time course studies with microarray techniques and experimental replicates are very useful in biomedical research. We present, in replicate experiments, an alternative approach to select and cluster genes according to a new measure for association between genes. First, the procedure normalizes and standardizes the expression profile of each gene, and then, identifies scaling parameters that will further minimize the distance between replicates of the same gene. Then, the procedure filters out genes with a flat profile, detects differences between replicates, and separates genes without significant differences from the rest. For this last group of genes, we define a mean profile for each gene and use it to compute the distance between two genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dong:2011:NNK, author = "Qiwen Dong and Shuigeng Zhou", title = "Novel Nonlinear Knowledge-Based Mean Force Potentials Based on Machine Learning", journal = j-TCBB, volume = "8", number = "2", pages = "476--486", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.86", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The prediction of 3D structures of proteins from amino acid sequences is one of the most challenging problems in molecular biology. An essential task for solving this problem with coarse-grained models is to deduce effective interaction potentials. The development and evaluation of new energy functions is critical to accurately modeling the properties of biological macromolecules. Knowledge-based mean force potentials are derived from statistical analysis of proteins of known structures. Current knowledge-based potentials are almost in the form of weighted linear sum of interaction pairs. In this study, a class of novel nonlinear knowledge-based mean force potentials is presented. The potential parameters are obtained by nonlinear classifiers, instead of relative frequencies of interaction pairs against a reference state or linear classifiers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Loriot:2011:CSD, author = "Sebastien Loriot and Sushant Sachdeva and Karine Bastard and Chantal Prevost and Frederic Cazals", title = "On the Characterization and Selection of Diverse Conformational Ensembles with Applications to Flexible Docking", journal = j-TCBB, volume = "8", number = "2", pages = "487--498", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.59", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To address challenging flexible docking problems, a number of docking algorithms pregenerate large collections of candidate conformers. To remove the redundancy from such ensembles, a central problem in this context is to report a selection of conformers maximizing some geometric diversity criterion. We make three contributions to this problem. First, we resort to geometric optimization so as to report selections maximizing the molecular volume or molecular surface area (MSA) of the selection. Greedy strategies are developed, together with approximation bounds. Second, to assess the efficacy of our algorithms, we investigate two conformer ensembles corresponding to a flexible loop of four protein complexes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Giegerich:2011:SAS, author = "Robert Giegerich and Christian Hoener zu Siederdissen", title = "Semantics and Ambiguity of Stochastic {RNA} Family Models", journal = j-TCBB, volume = "8", number = "2", pages = "499--516", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.12", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Stochastic models, such as hidden Markov models or stochastic context-free grammars (SCFGs) can fail to return the correct, maximum likelihood solution in the case of semantic ambiguity. This problem arises when the algorithm implementing the model inspects the same solution in different guises. It is a difficult problem in the sense that proving semantic nonambiguity has been shown to be algorithmically undecidable, while compensating for it (by coalescing scores of equivalent solutions) has been shown to be NP-hard. For stochastic context-free grammars modeling RNA secondary structure, it has been shown that the distortion of results can be quite severe. Much less is known about the case when stochastic context-free grammars model the matching of a query sequence to an implicit consensus structure for an RNA family.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tofigh:2011:SID, author = "Ali Tofigh and Michael Hallett and Jens Lagergren", title = "Simultaneous Identification of Duplications and Lateral Gene Transfers", journal = j-TCBB, volume = "8", number = "2", pages = "517--535", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.14", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The incongruency between a gene tree and a corresponding species tree can be attributed to evolutionary events such as gene duplication and gene loss. This paper describes a combinatorial model where so-called DTL-scenarios are used to explain the differences between a gene tree and a corresponding species tree taking into account gene duplications, gene losses, and lateral gene transfers (also known as horizontal gene transfers). The reasonable biological constraint that a lateral gene transfer may only occur between contemporary species leads to the notion of acyclic DTL-scenarios. Parsimony methods are introduced by defining appropriate optimization problems. We show that finding most parsimonious acyclic DTL-scenarios is NP-hard.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lieberman:2011:VEA, author = "Michael D. Lieberman and Sima Taheri and whatever Guo and Fatemeh Mirrashed and Inbal Yahav and Aleks Aris and Ben Shneiderman", title = "Visual Exploration across Biomedical Databases", journal = j-TCBB, volume = "8", number = "2", pages = "536--550", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.1", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Though biomedical research often draws on knowledge from a wide variety of fields, few visualization methods for biomedical data incorporate meaningful cross-database exploration. A new approach is offered for visualizing and exploring a query-based subset of multiple heterogeneous biomedical databases. Databases are modeled as an entity-relation graph containing nodes (database records) and links (relationships between records). Users specify a keyword search string to retrieve an initial set of nodes, and then explore intra- and interdatabase links. Results are visualized with user-defined semantic substrates to take advantage of the rich set of attributes usually present in biomedical data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hickey:2011:AAN, author = "Glenn Hickey and Mathieu Blanchette and Paz Carmi and Anil Maheshwari and Norbert Zeh", title = "An Approximation Algorithm for the {Noah's Ark Problem} with Random Feature Loss", journal = j-TCBB, volume = "8", number = "2", pages = "551--556", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.37", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The phylogenetic diversity (PD) of a set of species is a measure of their evolutionary distinctness based on a phylogenetic tree. PD is increasingly being adopted as an index of biodiversity in ecological conservation projects. The Noah's Ark Problem (NAP) is an NP-Hard optimization problem that abstracts a fundamental conservation challenge in asking to maximize the expected PD of a set of taxa given a fixed budget, where each taxon is associated with a cost of conservation and a probability of extinction. Only simplified instances of the problem, where one or more parameters are fixed as constants, have as of yet been addressed in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cortes:2011:EMM, author = "Juan Cortes and Sophie Barbe and Monique Erard and Thierry Simeon", title = "Encoding Molecular Motions in Voxel Maps", journal = j-TCBB, volume = "8", number = "2", pages = "557--563", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.23", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper builds on the combination of robotic path planning algorithms and molecular modeling methods for computing large-amplitude molecular motions, and introduces voxel maps as a computational tool to encode and to represent such motions. We investigate several applications and show results that illustrate the interest of such representation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rocha:2011:GCL, author = "J. Rocha", title = "Graph Comparison by Log-Odds Score Matrices with Application to Protein Topology Analysis", journal = j-TCBB, volume = "8", number = "2", pages = "564--569", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.59", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A TOPS diagram is a simplified description of the topology of a protein using a graph where nodes are $ \alpha $-helices and $ \beta $-strands, and edges correspond to chirality relations and parallel or antiparallel bonds between strands. We present a matching algorithm between two TOPS diagrams where the likelihood of a match is measured according to previously known matches between complete 3D structures. This totally new 3D training is recorded on transition matrices that count the likelihood that a given TOPS feature, or combination thereof, is replaced by another feature on homologs. The new algorithm outperforms existing ones on a benchmark database. Some biologically significant examples are discussed as well.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fujita:2011:ICR, author = "Andre Fujita and Joao Ricardo Sato and Marcos Almeida Demasi and Rui Yamaguchi and Teppei Shimamura and Carlos Eduardo Ferreira and Mari Cleide Sogayar and Satoru Miyano", title = "Inferring Contagion in Regulatory Networks", journal = j-TCBB, volume = "8", number = "2", pages = "570--576", month = mar, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.40", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jan 26 14:16:19 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Several gene regulatory network models containing concepts of directionality at the edges have been proposed. However, only a few reports have an interpretable definition of directionality. Here, differently from the standard causality concept defined by Pearl, we introduce the concept of contagion in order to infer directionality at the edges, i.e., asymmetries in gene expression dependences of regulatory networks. Moreover, we present a bootstrap algorithm in order to test the contagion concept. This technique was applied in simulated data and, also, in an actual large sample of biological data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Benso:2011:CMG, author = "Alfredo Benso and Stefano {Di Carlo} and Gianfranco Politano", title = "A {cDNA} Microarray Gene Expression Data Classifier for Clinical Diagnostics Based on Graph Theory", journal = j-TCBB, volume = "8", number = "3", pages = "577--591", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.90", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yoruk:2011:CSM, author = "Erdem Yoruk and Michael F. Ochs and Donald Geman and Laurent Younes", title = "A Comprehensive Statistical Model for Cell Signaling", journal = j-TCBB, volume = "8", number = "3", pages = "592--606", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.87", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2011:FHC, author = "Jianxin Wang and Min Li and Jianer Chen and Yi Pan", title = "A Fast Hierarchical Clustering Algorithm for Functional Modules Discovery in Protein Interaction Networks", journal = j-TCBB, volume = "8", number = "3", pages = "607--620", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.75", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feng:2011:MFB, author = "Jianxing Feng and Rui Jiang and Tao Jiang", title = "A Max-Flow-Based Approach to the Identification of Protein Complexes Using Protein Interaction and Microarray Data", journal = j-TCBB, volume = "8", number = "3", pages = "621--634", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.78", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huber:2011:PAR, author = "Katharina T. Huber and Leo van Iersel and Steven Kelk and Rados{\l}aw Suchecki", title = "A Practical Algorithm for Reconstructing Level-1 Phylogenetic Networks", journal = j-TCBB, volume = "8", number = "3", pages = "635--649", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.17", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Murphy:2011:TAP, author = "James T. Murphy and Ray Walshe and Marc Devocelle", title = "A Theoretical Analysis of the {Prodrug} Delivery System for Treating Antibiotic-Resistant Bacteria", journal = j-TCBB, volume = "8", number = "3", pages = "650--658", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.58", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ghorai:2011:CCG, author = "Santanu Ghorai and Anirban Mukherjee and Sanghamitra Sengupta and Pranab K. Dutta", title = "Cancer Classification from Gene Expression Data by {NPPC} Ensemble", journal = j-TCBB, volume = "8", number = "3", pages = "659--671", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.36", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gossler:2011:CBM, author = "Gregor Gossler", title = "Component-Based Modeling and Reachability Analysis of Genetic Networks", journal = j-TCBB, volume = "8", number = "3", pages = "672--682", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.81", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tamada:2011:EGW, author = "Yoshinori Tamada and Seiya Imoto and Hiromitsu Araki and Masao Nagasaki and Cristin Print and D. Stephen Charnock-Jones and Satoru Miyano", title = "Estimating Genome-Wide Gene Networks Using Nonparametric {Bayesian} Network Models on Massively Parallel Computers", journal = j-TCBB, volume = "8", number = "3", pages = "683--697", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.68", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hudek:2011:FSL, author = "Alexander K. Hudek and Daniel G. Brown", title = "{FEAST}: Sensitive Local Alignment with Multiple Rates of Evolution", journal = j-TCBB, volume = "8", number = "3", pages = "698--709", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.76", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Allman:2011:ITT, author = "Elizabeth S. Allman and Sonja Petrovi{\'c} and John A. Rhodes and Seth Sullivant", title = "Identifiability of Two-Tree Mixtures for Group-Based Models", journal = j-TCBB, volume = "8", number = "3", pages = "710--722", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.79", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2011:INR, author = "Tianwei Yu and Hesen Peng and Wei Sun", title = "Incorporating Nonlinear Relationships in Microarray Missing Value Imputation", journal = j-TCBB, volume = "8", number = "3", pages = "723--731", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.73", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2011:MSS, author = "Bin Song and {\.I} Esra B{\"u}y{\"u}ktahtakin and Sanjay Ranka and Tamer Kahveci", title = "Manipulating the Steady State of Metabolic Pathways", journal = j-TCBB, volume = "8", number = "3", pages = "732--747", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.41", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2011:MLP, author = "Qian Xu and Sinno Jialin Pan and Hannah Hong Xue and Qiang Yang", title = "Multitask Learning for Protein Subcellular Location Prediction", journal = j-TCBB, volume = "8", number = "3", pages = "748--759", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.22", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Armananzas:2011:PSM, author = "Ruben Armananzas and Yvan Saeys and Inaki Inza and Miguel Garcia-Torres and Concha Bielza and Yves van de Peer and Pedro Larranaga", title = "Peakbin Selection in Mass Spectrometry Data Using a Consensus Approach with Estimation of Distribution Algorithms", journal = j-TCBB, volume = "8", number = "3", pages = "760--774", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.18", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mitrofanova:2011:PPF, author = "Antonina Mitrofanova and Vladimir Pavlovic and Bud Mishra", title = "Prediction of Protein Functions with Gene Ontology and Interspecies Protein Homology Data", journal = j-TCBB, volume = "8", number = "3", pages = "775--784", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.15", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Willson:2011:RNC, author = "Stephen J. Willson", title = "Regular Networks Can be Uniquely Constructed from Their Trees", journal = j-TCBB, volume = "8", number = "3", pages = "785--796", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.69", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Strutz:2011:SRL, author = "Tilo Strutz", title = "{$3$D} Shape Reconstruction of Loop Objects in {X}-Ray Protein Crystallography", journal = j-TCBB, volume = "8", number = "3", pages = "797--807", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.67", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dost:2011:TFM, author = "Banu Dost and Chunlei Wu and Andrew Su and Vineet Bafna", title = "{TCLUST}: a Fast Method for Clustering Genome-Scale Expression Data", journal = j-TCBB, volume = "8", number = "3", pages = "808--818", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.34", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Venkateswaran:2011:TTF, author = "Jayendra Gnanaskandan Venkateswaran and Bin Song and Tamer Kahveci and Christopher Jermaine", title = "{TRIAL}: a Tool for Finding Distant Structural Similarities", journal = j-TCBB, volume = "8", number = "3", pages = "819--831", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Valentini:2011:TPR, author = "Giorgio Valentini", title = "True Path Rule Hierarchical Ensembles for Genome-Wide Gene Function Prediction", journal = j-TCBB, volume = "8", number = "3", pages = "832--847", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.38", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bansal:2011:NFP, author = "Mukul S. Bansal and Ron Shamir", title = "A Note on the Fixed Parameter Tractability of the Gene-Duplication Problem", journal = j-TCBB, volume = "8", number = "3", pages = "848--850", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.74", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sehgal:2011:IRD, author = "Aditya Kumar Sehgal and Sanmay Das and Keith Noto and Milton H. {Saier, Jr.} and Charles Elkan", title = "Identifying Relevant Data for a Biological Database: Handcrafted Rules versus Machine Learning", journal = j-TCBB, volume = "8", number = "3", pages = "851--857", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.83", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2011:TBU, author = "Minh N. Nguyen and Jacek M. Zurada and Jagath C. Rajapakse", title = "Toward Better Understanding of Protein Secondary Structure: Extracting Prediction Rules", journal = j-TCBB, volume = "8", number = "3", pages = "858--864", month = may # "\slash " # jun, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.16", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed May 25 15:41:56 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Borodovsky:2011:GEI, author = "Mark Borodovsky and Teresa M. Przytycka and Sanguthevar Rajasekaran and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "8", number = "4", pages = "865--866", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.92", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shibberu:2011:SAP, author = "Yosi Shibberu and Allen Holder", title = "A Spectral Approach to Protein Structure Alignment", journal = j-TCBB, volume = "8", number = "4", pages = "867--875", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.24", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A new intrinsic geometry based on a spectral analysis is used to motivate methods for aligning protein folds. The geometry is induced by the fact that a distance matrix can be scaled so that its eigenvalues are positive. We provide a mathematically rigorous development of the intrinsic geometry underlying our spectral approach and use it to motivate two alignment algorithms. The first uses eigenvalues alone and dynamic programming to quickly compute a fold alignment. Family identification results are reported for the Skolnick40 and Proteus300 data sets. The second algorithm extends our spectral method by iterating between our intrinsic geometry and the 3D geometry of a fold to make high-quality alignments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ferraro:2011:ACQ, author = "Nicola Ferraro and Luigi Palopoli and Simona Panni and Simona E. Rombo", title = "Asymmetric Comparison and Querying of Biological Networks", journal = j-TCBB, volume = "8", number = "4", pages = "876--889", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.29", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Comparing and querying the protein-protein interaction (PPI) networks of different organisms is important to infer knowledge about conservation across species. Known methods that perform these tasks operate symmetrically, i.e., they do not assign a distinct role to the input PPI networks. However, in most cases, the input networks are indeed distinguishable on the basis of how the corresponding organism is biologically well characterized. In this paper a new idea is developed, that is, to exploit differences in the characterization of organisms at hand in order to devise methods for comparing their PPI networks. We use the PPI network (called Master) of the best characterized organism as a fingerprint to guide the alignment process to the second input network (called Slave), so that generated results preferably retain the structural characteristics of the Master network.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wiedenhoeft:2011:PMI, author = "John Wiedenhoeft and Roland Krause and Oliver Eulenstein", title = "The Plexus Model for the Inference of Ancestral Multidomain Proteins", journal = j-TCBB, volume = "8", number = "4", pages = "890--901", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.22", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Interactions of protein domains control essential cellular processes. Thus, inferring the evolutionary histories of multidomain proteins in the context of their families can provide rewarding insights into protein function. However, methods to infer these histories are challenged by the complexity of macroevolutionary events. Here, we address this challenge by describing an algorithm that computes a novel network-like structure, called plexus, which represents the evolution of domains and their combinations. Finally, we demonstrate the performance of this algorithm with empirical data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pattengale:2011:UHP, author = "Nicholas Pattengale and Andre Aberer and Krister Swenson and Alexandros Stamatakis and Bernard Moret", title = "Uncovering Hidden Phylogenetic Consensus in Large Data Sets", journal = j-TCBB, volume = "8", number = "4", pages = "902--911", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many of the steps in phylogenetic reconstruction can be confounded by ``rogue'' taxa---taxa that cannot be placed with assurance anywhere within the tree, indeed, whose location within the tree varies with almost any choice of algorithm or parameters. Phylogenetic consensus methods, in particular, are known to suffer from this problem. In this paper, we provide a novel framework to define and identify rogue taxa. In this framework, we formulate a bicriterion optimization problem, the relative information criterion, that models the net increase in useful information present in the consensus tree when certain taxa are removed from the input data. We also provide an effective greedy heuristic to identify a subset of rogue taxa and use this heuristic in a series of experiments, with both pathological examples from the literature and a collection of large biological data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gysel:2011:EIC, author = "Rob Gysel and Daniel Gusfield", title = "Extensions and Improvements to the Chordal Graph Approach to the Multistate Perfect Phylogeny Problem", journal = j-TCBB, volume = "8", number = "4", pages = "912--917", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.27", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The multistate perfect phylogeny problem is a classic problem in computational biology. When no perfect phylogeny exists, it is of interest to find a set of characters to remove in order to obtain a perfect phylogeny in the remaining data. This is known as the character removal problem. We show how to use chordal graphs and triangulations to solve the character removal problem for an arbitrary number of states, which was previously unsolved. We outline a preprocessing technique that speeds up the computation of the minimal separators of a graph.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tsai:2011:CTA, author = "Ming-Chi Tsai and Guy E. Blelloch and R. Ravi and Russell Schwartz", title = "A Consensus Tree Approach for Reconstructing Human Evolutionary History and Detecting Population Substructure", journal = j-TCBB, volume = "8", number = "4", pages = "918--928", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.23", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The random accumulation of variations in the human genome over time implicitly encodes a history of how human populations have arisen, dispersed, and intermixed since we emerged as a species. Reconstructing that history is a challenging computational and statistical problem but has important applications both to basic research and to the discovery of genotype-phenotype correlations. We present a novel approach to inferring human evolutionary history from genetic variation data. We use the idea of consensus trees, a technique generally used to reconcile species trees from divergent gene trees, adapting it to the problem of finding robust relationships within a set of intraspecies phylogenies derived from local regions of the genome.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bandyopadhyay:2011:BIM, author = "Sanghamitra Bandyopadhyay and Malay Bhattacharyya", title = "A Biologically Inspired Measure for Coexpression Analysis", journal = j-TCBB, volume = "8", number = "4", pages = "929--942", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.106", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Two genes are said to be coexpressed if their expression levels have a similar spatial or temporal pattern. Ever since the profiling of gene microarrays has been in progress, computational modeling of coexpression has acquired a major focus. As a result, several similarity/distance measures have evolved over time to quantify coexpression similarity/dissimilarity between gene pairs. Of these, correlation coefficient has been established to be a suitable quantifier of pairwise coexpression. In general, correlation coefficient is good for symbolizing linear dependence, but not for nonlinear dependence. In spite of this drawback, it outperforms many other existing measures in modeling the dependency in biological data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tenazinha:2011:SMM, author = "Nuno Tenazinha and Susana Vinga", title = "A Survey on Methods for Modeling and Analyzing Integrated Biological Networks", journal = j-TCBB, volume = "8", number = "4", pages = "943--958", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.117", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Understanding how cellular systems build up integrated responses to their dynamically changing environment is one of the open questions in Systems Biology. Despite their intertwinement, signaling networks, gene regulation and metabolism have been frequently modeled independently in the context of well-defined subsystems. For this purpose, several mathematical formalisms have been developed according to the features of each particular network under study. Nonetheless, a deeper understanding of cellular behavior requires the integration of these various systems into a model capable of capturing how they operate as an ensemble. With the recent advances in the ``omics'' technologies, more data is becoming available and, thus, recent efforts have been driven toward this integrated modeling approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2011:IHA, author = "Chao-Wen Huang and Wun-Shiun Lee and Sun-Yuan Hsieh", title = "An Improved Heuristic Algorithm for Finding Motif Signals in {DNA} Sequences", journal = j-TCBB, volume = "8", number = "4", pages = "959--975", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.92", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The planted (l,d)-motif search problem is a mathematical abstraction of the DNA functional site discovery task. In this paper, we propose a heuristic algorithm that can find planted (l,d)-signals in a given set of DNA sequences. Evaluations on simulated data sets demonstrate that the proposed algorithm outperforms current widely used motif finding algorithms. We also report the results of experiments on real biological data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bocker:2011:DGS, author = "Sebastian Bocker and Birte Kehr and Florian Rasche", title = "Determination of Glycan Structure from Tandem Mass Spectra", journal = j-TCBB, volume = "8", number = "4", pages = "976--986", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Glycans are molecules made from simple sugars that form complex tree structures. Glycans constitute one of the most important protein modifications and identification of glycans remains a pressing problem in biology. Unfortunately, the structure of glycans is hard to predict from the genome sequence of an organism. In this paper, we consider the problem of deriving the topology of a glycan solely from tandem mass spectrometry (MS) data. We study, how to generate glycan tree candidates that sufficiently match the sample mass spectrum, avoiding the combinatorial explosion of glycan structures. Unfortunately, the resulting problem is known to be computationally hard. We present an efficient exact algorithm for this problem based on fixed-parameter algorithmics that can process a spectrum in a matter of seconds.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2011:EAE, author = "Samuel S. Y. Wong and Weimin Luo and Keith C. C. Chan", title = "{EvoMD}: An Algorithm for Evolutionary Molecular Design", journal = j-TCBB, volume = "8", number = "4", pages = "987--1003", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.100", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Traditionally, Computer-Aided Molecular Design (CAMD) uses heuristic search and mathematical programming to tackle the molecular design problem. But these techniques do not handle large and nonlinear search space very well. To overcome these drawbacks, graph-based evolutionary algorithms (EAs) have been proposed to evolve molecular design by mimicking chemical reactions on the exchange of chemical bonds and components between molecules. For these EAs to perform their tasks, known molecular components, which can serve as building blocks for the molecules to be designed, and known chemical rules, which govern chemical combination between different components, have to be introduced before the evolutionary process can take place.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Merelli:2011:IBS, author = "Ivan Merelli and Paolo Cozzi and Daniele D'Agostino and Andrea Clematis and Luciano Milanesi", title = "Image-Based Surface Matching Algorithm Oriented to Structural Biology", journal = j-TCBB, volume = "8", number = "4", pages = "1004--1016", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.21", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Emerging technologies for structure matching based on surface descriptions have demonstrated their effectiveness in many research fields. In particular, they can be successfully applied to in silico studies of structural biology. Protein activities, in fact, are related to the external characteristics of these macromolecules and the ability to match surfaces can be important to infer information about their possible functions and interactions. In this work, we present a surface-matching algorithm, based on encoding the outer morphology of proteins in images of local description, which allows us to establish point-to-point correlations among macromolecular surfaces using image-processing functions. Discarding methods relying on biological analysis of atomic structures and expensive computational approaches based on energetic studies, this algorithm can successfully be used for macromolecular recognition by employing local surface features.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DiLena:2011:TOS, author = "Pietro {Di Lena} and Piero Fariselli and Luciano Margara and Marco Vassura and Rita Casadio", title = "Is There an Optimal Substitution Matrix for Contact Prediction with Correlated Mutations?", journal = j-TCBB, volume = "8", number = "4", pages = "1017--1028", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.91", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Correlated mutations in proteins are believed to occur in order to preserve the protein functional folding through evolution. Their values can be deduced from sequence and/or structural alignments and are indicative of residue contacts in the protein three-dimensional structure. A correlation among pairs of residues is routinely evaluated with the Pearson correlation coefficient and the MCLACHLAN similarity matrix. In literature, there is no justification for the adoption of the MCLACHLAN instead of other substitution matrices. In this paper, we approach the problem of computing the optimal similarity matrix for contact prediction with correlated mutations, i.e., the similarity matrix that maximizes the accuracy of contact prediction with correlated mutations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huber:2011:MMT, author = "Katharina T. Huber and Andreas Spillner and Rados law Suchecki and Vincent Moulton", title = "Metrics on Multilabeled Trees: Interrelationships and Diameter Bounds", journal = j-TCBB, volume = "8", number = "4", pages = "1029--1040", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.122", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multilabeled trees or MUL-trees, for short, are trees whose leaves are labeled by elements of some nonempty finite set X such that more than one leaf may be labeled by the same element of X. This class of trees includes phylogenetic trees and tree shapes. MUL-trees arise naturally in, for example, biogeography and gene evolution studies and also in the area of phylogenetic network reconstruction. In this paper, we introduce novel metrics which may be used to compare MUL-trees, most of which generalize well-known metrics on phylogenetic trees and tree shapes. These metrics can be used, for example, to better understand the space of MUL-trees or to help visualize collections of MUL-trees.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2011:MKI, author = "Xin Zhao and Leo Wang-Kit Cheung", title = "Multiclass Kernel-Imbedded {Gaussian} Processes for Microarray Data Analysis", journal = j-TCBB, volume = "8", number = "4", pages = "1041--1053", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.85", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identifying significant differentially expressed genes of a disease can help understand the disease at the genomic level. A hierarchical statistical model named multiclass kernel-imbedded Gaussian process (mKIGP) is developed under a Bayesian framework for a multiclass classification problem using microarray gene expression data. Specifically, based on a multinomial probit regression setting, an empirically adaptive algorithm with a cascading structure is designed to find appropriate featuring kernels, to discover potentially significant genes, and to make optimal tumor/cancer class predictions. A Gibbs sampler is adopted as the core of the algorithm to perform Bayesian inferences. A prescreening procedure is implemented to alleviate the computational complexity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2011:PTN, author = "Peng Zhang and Houqiang Li and Honghui Wang and Wong Stephen and Xiaobo Zhou", title = "Peak Tree: a New Tool for Multiscale Hierarchical Representation and Peak Detection of Mass Spectrometry Data", journal = j-TCBB, volume = "8", number = "4", pages = "1054--1066", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.56", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Peak detection is one of the most important steps in mass spectrometry (MS) analysis. However, the detection result is greatly affected by severe spectrum variations. Unfortunately, most current peak detection methods are neither flexible enough to revise false detection results nor robust enough to resist spectrum variations. To improve flexibility, we introduce peak tree to represent the peak information in MS spectra. Each tree node is a peak judgment on a range of scales, and each tree decomposition, as a set of nodes, is a candidate peak detection result. To improve robustness, we combine peak detection and common peak alignment into a closed-loop framework, which finds the optimal decomposition via both peak intensity and common peak information.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{El-Manzalawy:2011:PMI, author = "Yasser El-Manzalawy and Drena Dobbs and Vasant Honavar", title = "Predicting {MHC-II} Binding Affinity Using Multiple Instance Regression", journal = j-TCBB, volume = "8", number = "4", pages = "1067--1079", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.94", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reliably predicting the ability of antigen peptides to bind to major histocompatibility complex class II (MHC-II) molecules is an essential step in developing new vaccines. Uncovering the amino acid sequence correlates of the binding affinity of MHC-II binding peptides is important for understanding pathogenesis and immune response. The task of predicting MHC-II binding peptides is complicated by the significant variability in their length. Most existing computational methods for predicting MHC-II binding peptides focus on identifying a nine amino acids core region in each binding peptide. We formulate the problems of qualitatively and quantitatively predicting flexible length MHC-II peptides as multiple instance learning and multiple instance regression problems, respectively. Based on this formulation, we introduce MHCMIR, a novel method for predicting MHC-II binding affinity using multiple instance regression. We present results of experiments using several benchmark data sets that show that MHCMIR is competitive with the state-of-the-art methods for predicting MHC-II binding peptides. An online web server that implements the MHCMIR method for MHC-II binding affinity prediction is freely accessible at \path=http://ailab.cs.iastate.edu/mhcmir/=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2011:RFS, author = "Feng Yang and K. Z. Mao", title = "Robust Feature Selection for Microarray Data Based on Multicriterion Fusion", journal = j-TCBB, volume = "8", number = "4", pages = "1080--1092", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.103", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Feature selection often aims to select a compact feature subset to build a pattern classifier with reduced complexity, so as to achieve improved classification performance. From the perspective of pattern analysis, producing stable or robust solution is also a desired property of a feature selection algorithm. However, the issue of robustness is often overlooked in feature selection. In this study, we analyze the robustness issue existing in feature selection for high-dimensional and small-sized gene-expression data, and propose to improve robustness of feature selection algorithm by using multiple feature selection evaluation criteria. Based on this idea, a multicriterion fusion-based recursive feature elimination (MCF-RFE) algorithm is developed with the goal of improving both classification performance and stability of feature selection results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tino:2011:SCG, author = "Peter Ti{\v{n}}o and Hongya Zhao and Hong Yan", title = "Searching for Coexpressed Genes in Three-Color {cDNA} Microarray Data Using a Probabilistic Model-Based {Hough Transform}", journal = j-TCBB, volume = "8", number = "4", pages = "1093--1107", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.120", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The effects of a drug on the genomic scale can be assessed in a three-color cDNA microarray with the three color intensities represented through the so-called hexaMplot. In our recent study, we have shown that the Hough Transform (HT) applied to the hexaMplot can be used to detect groups of coexpressed genes in the normal-disease-drug samples. However, the standard HT is not well suited for the purpose because (1) the assayed genes need first to be hard-partitioned into equally and differentially expressed genes, with HT ignoring possible information in the former group; (2) the hexaMplot coordinates are negatively correlated and there is no direct way of expressing this in the standard HT and (3) it is not clear how to quantify the association of coexpressed genes with the line along which they cluster.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2011:IMP, author = "Li-San Wang and Jim Leebens-Mack and P. Kerr Wall and Kevin Beckmann and Claude W. dePamphilis and Tandy Warnow", title = "The Impact of Multiple Protein Sequence Alignment on Phylogenetic Estimation", journal = j-TCBB, volume = "8", number = "4", pages = "1108--1119", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2009.68", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiple sequence alignment is typically the first step in estimating phylogenetic trees, with the assumption being that as alignments improve, so will phylogenetic reconstructions. Over the last decade or so, new multiple sequence alignment methods have been developed to improve comparative analyses of protein structure, but these new methods have not been typically used in phylogenetic analyses. In this paper, we report on a simulation study that we performed to evaluate the consequences of using these new multiple sequence alignment methods in terms of the resultant phylogenetic reconstruction. We find that while alignment accuracy is positively correlated with phylogenetic accuracy, the amount of improvement in phylogenetic estimation that results from an improved alignment can range from quite small to substantial.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sadjad:2011:TRS, author = "Bashir Sadjad and Zsolt Zsoldos", title = "Toward a Robust Search Method for the Protein--Drug Docking Problem", journal = j-TCBB, volume = "8", number = "4", pages = "1120--1133", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.70", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Predicting the binding mode(s) of a drug molecule to a target receptor is pivotal in structure-based rational drug design. In contrast to most approaches to solve this problem, the idea in this paper is to analyze the search problem from a computational perspective. By building on top of an existing docking tool, new methods are proposed and relevant computational results are proven. These methods and results are applicable for other place-and-join frameworks as well. A fast approximation scheme for the docking of rigid fragments is described that guarantees certain geometric approximation factors. It is also demonstrated that this can be translated into an energy approximation for simple scoring functions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2011:ARD, author = "Yang Chen and Jinglu Hu", title = "Accurate Reconstruction for {DNA} Sequencing by Hybridization Based on a Constructive Heuristic", journal = j-TCBB, volume = "8", number = "4", pages = "1134--1140", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.89", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sequencing by hybridization is a promising cost-effective technology for high-throughput DNA sequencing via microarray chips. However, due to the effects of spectrum errors rooted in experimental conditions, an accurate and fast reconstruction of original sequences has become a challenging problem. In the last decade, a variety of analyses and designs have been tried to overcome this problem, where different strategies have different trade-offs in speed and accuracy. Motivated by the idea that the errors could be identified by analyzing the interrelation of spectrum elements, this paper presents a constructive heuristic algorithm, featuring an accurate reconstruction guided by a set of well-defined criteria and rules.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guillemot:2011:CSM, author = "Sylvain Guillemot and Jesper Jansson and Wing-Kin Sung", title = "Computing a Smallest Multilabeled Phylogenetic Tree from Rooted Triplets", journal = j-TCBB, volume = "8", number = "4", pages = "1141--1147", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.77", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peters:2011:TSC, author = "Tim Peters and David W. Bulger and To-ha Loi and Jean Yee Hwa Yang and David Ma", title = "Two-Step Cross-Entropy Feature Selection for Microarrays --- Power Through Complementarity", journal = j-TCBB, volume = "8", number = "4", pages = "1148--1151", month = jul, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.30", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 27 10:53:41 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Current feature selection methods for supervised classification of tissue samples from microarray data generally fail to exploit complementary discriminatory power that can be found in sets of features [CHECK END OF SENTENCE]. Using a feature selection method with the computational architecture of the cross-entropy method [CHECK END OF SENTENCE], including an additional preliminary step ensuring a lower bound on the number of times any feature is considered, we show when testing on a human lymph node data set that there are a significant number of genes that perform well when their complementary power is assessed, but ``pass under the radar'' of popular feature selection methods that only assess genes individually on a given classification tool.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2011:GMA, author = "Dongxiao Zhu and Lipi Acharya and Hui Zhang", title = "A Generalized Multivariate Approach to Pattern Discovery from Replicated and Incomplete Genome-Wide Measurements", journal = j-TCBB, volume = "8", number = "5", pages = "1153--1169", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chang:2011:NKD, author = "Rui Chang and Robert Shoemaker and Wei Wang", title = "A Novel Knowledge-Driven Systems Biology Approach for Phenotype Prediction upon Genetic Intervention", journal = j-TCBB, volume = "8", number = "5", pages = "1170--1182", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.18", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Irurozki:2011:PPH, author = "Ekhine Irurozki and Borja Calvo and Jose A. Lozano", title = "A Preprocessing Procedure for Haplotype Inference by Pure Parsimony", journal = j-TCBB, volume = "8", number = "5", pages = "1183--1195", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.125", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Battagliero:2011:EAA, author = "Simone Battagliero and Giuseppe Puglia and Saverio Vicario and Francesco Rubino and Gaetano Scioscia and Pietro Leo", title = "An Efficient Algorithm for Approximating Geodesic Distances in Tree Space", journal = j-TCBB, volume = "8", number = "5", pages = "1196--1207", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.121", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{John:2011:CCP, author = "David J. John and Jacquelyn S. Fetrow and James L. Norris", title = "Continuous Cotemporal Probabilistic Modeling of Systems Biology Networks from Sparse Data", journal = j-TCBB, volume = "8", number = "5", pages = "1208--1222", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.95", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guziolowski:2011:DLR, author = "Carito Guziolowski and Sylvain Blachon and Tatiana Baumuratova and Gautier Stoll and Ovidiu Radulescu and Anne Siegel", title = "Designing Logical Rules to Model the Response of Biomolecular Networks with Complex Interactions: An Application to Cancer Modeling", journal = j-TCBB, volume = "8", number = "5", pages = "1223--1234", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.71", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sahu:2011:ELH, author = "Sitanshu Sekhar Sahu and Ganapati Panda", title = "Efficient Localization of Hot Spots in Proteins Using a Novel {$S$}-Transform Based Filtering Approach", journal = j-TCBB, volume = "8", number = "5", pages = "1235--1246", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.109", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Flores:2011:FFM, author = "Samuel Coulbourn Flores and Michael Sherman and Christopher M. Bruns and Peter Eastman and Russ B. Altman", title = "Fast Flexible Modeling of {RNA} Structure Using Internal Coordinates", journal = j-TCBB, volume = "8", number = "5", pages = "1247--1257", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.104", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2011:IAF, author = "Biing-Feng Wang and Chien-Hsin Lin", title = "Improved Algorithms for Finding Gene Teams and Constructing Gene Team Trees", journal = j-TCBB, volume = "8", number = "5", pages = "1258--1272", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2011:MBS, author = "Chun-Hou Zheng and Lei Zhang and To-Yee Ng and Chi Keung Shiu and De-Shuang Huang", title = "Metasample-Based Sparse Representation for Tumor Classification", journal = j-TCBB, volume = "8", number = "5", pages = "1273--1282", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.20", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peng:2011:MSA, author = "Qian Peng and Andrew D. Smith", title = "Multiple Sequence Assembly from Reads Alignable to a Common Reference Genome", journal = j-TCBB, volume = "8", number = "5", pages = "1283--1295", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.107", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Betzler:2011:PAF, author = "Nadja Betzler and Rene van Bevern and Michael R. Fellows and Christian Komusiewicz and Rolf Niedermeier", title = "Parameterized Algorithmics for Finding Connected Motifs in Biological Networks", journal = j-TCBB, volume = "8", number = "5", pages = "1296--1308", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.19", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2011:PMS, author = "Jong Kyoung Kim and Seungjin Choi", title = "Probabilistic Models for Semisupervised Discriminative Motif Discovery in {DNA} Sequences", journal = j-TCBB, volume = "8", number = "5", pages = "1309--1317", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.84", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feijao:2011:SBL, author = "Pedro Feijao and Joao Meidanis", title = "{SCJ}: a Breakpoint-Like Distance that Simplifies Several Rearrangement Problems", journal = j-TCBB, volume = "8", number = "5", pages = "1318--1329", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.34", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mernberger:2011:SSG, author = "Marco Mernberger and Gerhard Klebe and Eyke Hullermeier", title = "{SEGA}: Semiglobal Graph Alignment for Structure-Based Protein Comparison", journal = j-TCBB, volume = "8", number = "5", pages = "1330--1343", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.35", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Boyen:2011:SGM, author = "Peter Boyen and Dries {Van Dyck} and Frank Neven and Roeland C. H. J. van Ham and Aalt D. J. van Dijk", title = "{SLIDER}: a Generic Metaheuristic for the Discovery of Correlated Motifs in Protein-Protein Interaction Networks", journal = j-TCBB, volume = "8", number = "5", pages = "1344--1357", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.17", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Buhrman:2011:SMR, author = "Harry Buhrman and Peter T. S. van der Gulik and Steven M. Kelk and Wouter M. Koolen and Leen Stougie", title = "Some Mathematical Refinements Concerning Error Minimization in the Genetic Code", journal = j-TCBB, volume = "8", number = "5", pages = "1358--1372", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.40", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2011:UKA, author = "William W. L. Wong and Forbes J. Burkowski", title = "Using Kernel Alignment to Select Features of Molecular Descriptors in a {QSAR} Study", journal = j-TCBB, volume = "8", number = "5", pages = "1373--1384", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.31", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Muselli:2011:MMV, author = "Marco Muselli and Alberto Bertoni and Marco Frasca and Alessandro Beghini and Francesca Ruffino and Giorgio Valentini", title = "A Mathematical Model for the Validation of Gene Selection Methods", journal = j-TCBB, volume = "8", number = "5", pages = "1385--1392", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.83", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dubrova:2011:SBA, author = "Elena Dubrova and Maxim Teslenko", title = "A {SAT}-Based Algorithm for Finding Attractors in Synchronous {Boolean} Networks", journal = j-TCBB, volume = "8", number = "5", pages = "1393--1399", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.20", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2011:FEA, author = "Zhi-Zhong Chen and Lusheng Wang", title = "Fast Exact Algorithms for the Closest String and Substring Problems with Application to the Planted {$ (L, d) $}-Motif Model", journal = j-TCBB, volume = "8", number = "5", pages = "1400--1410", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.21", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Grusea:2011:DNC, author = "Simona Grusea", title = "On the Distribution of the Number of Cycles in the Breakpoint Graph of a Random Signed Permutation", journal = j-TCBB, volume = "8", number = "5", pages = "1411--1416", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.123", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Befekadu:2011:PMR, author = "Getachew K. Befekadu and Mahlet G. Tadesse and Tsung-Heng Tsai and Habtom W. Ressom", title = "Probabilistic Mixture Regression Models for Alignment of {LC-MS} Data", journal = j-TCBB, volume = "8", number = "5", pages = "1417--1424", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.88", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Magni:2011:SPI, author = "Paolo Magni and Angela Simeone and Sandra Healy and Antonella Isacchi and Roberta Bosotti", title = "Summarizing Probe Intensities of {Affymetrix GeneChip 3'} Expression Arrays Taking into Account Day-to-Day Variability", journal = j-TCBB, volume = "8", number = "5", pages = "1425--1430", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.82", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Aharoni:2011:QPD, author = "Ehud Aharoni and Hani Neuvirth and Saharon Rosset", title = "The Quality Preserving Database: a Computational Framework for Encouraging Collaboration, Enhancing Power and Controlling False Discovery", journal = j-TCBB, volume = "8", number = "5", pages = "1431--1437", month = sep, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.105", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Aug 17 09:10:05 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Axenopoulos:2011:SDF, author = "Apostolos Axenopoulos and Petros Daras and Georgios Papadopoulos and Elias Houstis", title = "A Shape Descriptor for Fast Complementarity Matching in Molecular Docking", journal = j-TCBB, volume = "8", number = "6", pages = "1441--1457", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.72", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2011:ASM, author = "Wenqi Zhao and Guoliang Xu and Chandrajit L. Bajaj", title = "An Algebraic Spline Model of Molecular Surfaces for Energetic Computations", journal = j-TCBB, volume = "8", number = "6", pages = "1458--1467", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.81", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nebel:2011:AFE, author = "Markus E. Nebel and Scheid Anika", title = "Analysis of the Free Energy in a Stochastic {RNA} Secondary Structure Model", journal = j-TCBB, volume = "8", number = "6", pages = "1468--1482", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2011:ASB, author = "Liang Zhao and Limsoon Wong and Jinyan Li", title = "Antibody-Specified {B}-Cell Epitope Prediction in Line with the Principle of Context-Awareness", journal = j-TCBB, volume = "8", number = "6", pages = "1483--1494", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.49", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cobanoglu:2011:CGU, author = "Murat Can Cobanoglu and Yucel Saygin and Ugur Sezerman", title = "Classification of {GPCRs} Using Family Specific Motifs", journal = j-TCBB, volume = "8", number = "6", pages = "1495--1508", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.101", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2011:CED, author = "Qi Li and Chandra Kambhamettu", title = "Contour Extraction of \bioname{Drosophila} Embryos", journal = j-TCBB, volume = "8", number = "6", pages = "1509--1521", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.37", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Oh:2011:FKD, author = "Jung Hun Oh and Jean Gao", title = "Fast Kernel Discriminant Analysis for Classification of Liver Cancer Mass Spectra", journal = j-TCBB, volume = "8", number = "6", pages = "1522--1534", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.42", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2011:FAP, author = "Qingfeng Chen and Yi-Ping Phoebe Chen", title = "Function Annotation for Pseudoknot Using Structure Similarity", journal = j-TCBB, volume = "8", number = "6", pages = "1535--1544", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.50", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chalkidis:2011:HPH, author = "Georgios Chalkidis and Masao Nagasaki and Satoru Miyano", title = "High Performance Hybrid Functional {Petri} Net Simulations of Biological Pathway Models on {CUDA}", journal = j-TCBB, volume = "8", number = "6", pages = "1545--1556", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.118", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2011:HHT, author = "Helong Li and Sam Kwong and Lihua Yang and Daren Huang and Dongping Xiao", title = "{Hilbert--Huang Transform} for Analysis of Heart Rate Variability in Cardiac Health", journal = j-TCBB, volume = "8", number = "6", pages = "1557--1567", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.43", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sheng:2011:IAG, author = "Jinhua Sheng and Hong-Wen Deng and Vince Calhoun and Yu-Ping Wang", title = "Integrated Analysis of Gene Expression and Copy Number Data on Gene Shaving Using Independent Component Analysis", journal = j-TCBB, volume = "8", number = "6", pages = "1568--1579", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.71", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2011:MIS, author = "Carla C. M. Chen and Holger Schwender and Jonthan Keith and Robin Nunkesser and Kerrie Mengersen and Paula Macrossan", title = "Methods for Identifying {SNP} Interactions: a Review on Variations of Logic Regression, Random Forest and {Bayesian} Logistic Regression", journal = j-TCBB, volume = "8", number = "6", pages = "1580--1591", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.46", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2011:MPD, author = "Chun-Hou Zheng and Lei Zhang and Vincent To-Yee Ng and Chi Keung Shiu and D.-S. Huang", title = "Molecular Pattern Discovery Based on Penalized Matrix Decomposition", journal = j-TCBB, volume = "8", number = "6", pages = "1592--1603", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.79", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{LeFaucheur:2011:NCS, author = "Xavier {Le Faucheur} and Eli Hershkovits and Rina Tannenbaum and Allen Tannenbaum", title = "Nonparametric Clustering for Studying {RNA} Conformations", journal = j-TCBB, volume = "8", number = "6", pages = "1604--1619", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dotu:2011:LPS, author = "Ivan Dotu and Manuel Cebrian and Pascal {Van Hentenryck} and Peter Clote", title = "On Lattice Protein Structure Prediction Revisited", journal = j-TCBB, volume = "8", number = "6", pages = "1620--1632", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.41", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2011:RUP, author = "Hong-Dong Li and Yi-Zeng Liang and Qing-Song Xu and Dong-Sheng Cao and Bin-Bin Tan and Bai-Chuan Deng and Chen-Chen Lin", title = "Recipe for Uncovering Predictive Genes Using Support Vector Machines Based on Model Population Analysis", journal = j-TCBB, volume = "8", number = "6", pages = "1633--1641", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.36", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hafemeister:2011:SOP, author = "Christoph Hafemeister and Roland Krause and Alexander Schliep", title = "Selecting Oligonucleotide Probes for Whole-Genome Tiling Arrays with a Cross-Hybridization Potential", journal = j-TCBB, volume = "8", number = "6", pages = "1642--1652", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.39", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fober:2011:SAL, author = "Thomas Fober and Gerghei Glinca and Gerhard Klebe and Eyke Hullermeier", title = "Superposition and Alignment of Labeled Point Clouds", journal = j-TCBB, volume = "8", number = "6", pages = "1653--1666", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.42", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tuller:2011:CEI, author = "Tamir Tuller and Elchanan Mossel", title = "Co-evolution Is Incompatible with the {Markov} Assumption in Phylogenetics", journal = j-TCBB, volume = "8", number = "6", pages = "1667--1670", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.124", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2011:CFS, author = "Bing-Yu Sun and Zhi-Hua Zhu and Jiuyong Li and Bin Linghu", title = "Combined Feature Selection and Cancer Prognosis Using Support Vector Machine Regression", journal = j-TCBB, volume = "8", number = "6", pages = "1671--1677", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.119", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2011:CBA, author = "Weiguo Liu and Bertil Schmidt and Wolfgang Muller-Wittig", title = "{CUDA-BLASTP}: Accelerating {BLASTP} on {CUDA}-Enabled Graphics Hardware", journal = j-TCBB, volume = "8", number = "6", pages = "1678--1684", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.33", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2011:GTS, author = "Louxin Zhang", title = "From Gene Trees to Species Trees {II}: Species Tree Inference by Minimizing Deep Coalescence Events", journal = j-TCBB, volume = "8", number = "6", pages = "1685--1691", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.83", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fellows:2011:HIC, author = "Michael R. Fellows and Tzvika Hartman and Danny Hermelin and Gad M. Landau and Frances Rosamond and Liat Rozenberg", title = "Haplotype Inference Constrained by Plausible Haplotype Data", journal = j-TCBB, volume = "8", number = "6", pages = "1692--1699", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.72", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ambroise:2011:IRP, author = "Jerome Ambroise and Joachim Giard and Jean-Luc Gala and Benoit Macq", title = "Identification of Relevant Properties for Epitopes Detection Using a Regression Model", journal = j-TCBB, volume = "8", number = "6", pages = "1700--1707", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.77", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2011:ICA, author = "Qingguo Wang and Yi Shang and Dong Xu", title = "Improving a Consensus Approach for Protein Structure Selection by Removing Redundancy", journal = j-TCBB, volume = "8", number = "6", pages = "1708--1715", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.75", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Poleksic:2011:OWU, author = "Aleksandar Poleksic", title = "Optimizing a Widely Used Protein Structure Alignment Measure in Expected Polynomial Time", journal = j-TCBB, volume = "8", number = "6", pages = "1716--1720", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.122", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rodrigues:2011:PCE, author = "Thiago de Souza Rodrigues and Fernanda Caldas Cardoso and Santuza Maria Ribeiro Teixeira and Sergio Costa Oliveira and Antonio Padua Braga", title = "Protein Classification with Extended-Sequence Coding by Sliding Window", journal = j-TCBB, volume = "8", number = "6", pages = "1721--1726", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.78", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DeBlasio:2012:MEM, author = "Daniel DeBlasio and Jocelyne Bruand and Shaojie Zhang", title = "A Memory Efficient Method for Structure-Based {RNA} Multiple Alignment", journal = j-TCBB, volume = "9", number = "1", pages = "1--11", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.86", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pirola:2012:EAH, author = "Yuri Pirola and Paola Bonizzoni and Tao Jiang", title = "An Efficient Algorithm for Haplotype Inference on Pedigrees with Recombinations and Mutations", journal = j-TCBB, volume = "9", number = "1", pages = "12--25", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.51", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Doyon:2012:EME, author = "Jean-Philippe Doyon and Sylvie Hamel and Cedric Chauve", title = "An Efficient Method for Exploring the Space of Gene Tree\slash Species Tree Reconciliations in a Probabilistic Framework", journal = j-TCBB, volume = "9", number = "1", pages = "26--39", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.64", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:EMM, author = "Chong Wang and Peter Beyerlein and Heike Pospisil and Antje Krause and Chris Nugent and Werner Dubitzky", title = "An Efficient Method for Modeling Kinetic Behavior of Channel Proteins in Cardiomyocytes", journal = j-TCBB, volume = "9", number = "1", pages = "40--51", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.84", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vasic:2012:ITA, author = "Bane Vasic and Vida Ravanmehr and Anantha Raman Krishnan", title = "An Information Theoretic Approach to Constructing Robust {Boolean} Gene Regulatory Networks", journal = j-TCBB, volume = "9", number = "1", pages = "52--65", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.61", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Piraveenan:2012:AMD, author = "Mahendra Piraveenan and Mikhail Prokopenko and Albert Zomaya", title = "Assortative Mixing in Directed Biological Networks", journal = j-TCBB, volume = "9", number = "1", pages = "66--78", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.80", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chan:2012:CVM, author = "Raymond H. Chan and Tony H. Chan and Hau Man Yeung and Roger Wei Wang", title = "Composition Vector Method Based on Maximum Entropy Principle for Sequence Comparison", journal = j-TCBB, volume = "9", number = "1", pages = "79--87", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.45", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Quevedo:2012:DLP, author = "Jose R. Quevedo and Antonio Bahamonde and Miguel Perez-Enciso and Oscar Luaces", title = "Disease Liability Prediction from Large Scale Genotyping Data Using Classifiers with a Reject Option", journal = j-TCBB, volume = "9", number = "1", pages = "88--97", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.44", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2012:DGE, author = "Ying-Xin Li and Shuiwang Ji and Sudhir Kumar and Jieping Ye and Zhi-Hua Zhou", title = "\bioname{Drosophila} Gene Expression Pattern Annotation through Multi-Instance Multi-Label Learning", journal = j-TCBB, volume = "9", number = "1", pages = "98--112", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.73", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Paoletti:2012:INC, author = "David R. Paoletti and Dan E. Krane and Michael L. Raymer and Travis E. Doom", title = "Inferring the Number of Contributors to Mixed {DNA} Profiles", journal = j-TCBB, volume = "9", number = "1", pages = "113--122", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.76", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qian:2012:IGR, author = "Xiaoning Qian and Edward R. Dougherty", title = "Intervention in Gene Regulatory Networks via Phenotypically Constrained Control Policies Based on Long-Run Behavior", journal = j-TCBB, volume = "9", number = "1", pages = "123--136", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.107", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kuruppu:2012:IDC, author = "Shanika Kuruppu and Bryan Beresford-Smith and Thomas Conway and Justin Zobel", title = "Iterative Dictionary Construction for Compression of Large {DNA} Data Sets", journal = j-TCBB, volume = "9", number = "1", pages = "137--149", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.82", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bogdanowicz:2012:MSD, author = "Damian Bogdanowicz and Krzysztof Giaro", title = "Matching Split Distance for Unrooted Binary Phylogenetic Trees", journal = j-TCBB, volume = "9", number = "1", pages = "150--160", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.48", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2012:MEA, author = "Thomas K. F. Wong and Y. S. Chiu and T. W. Lam and S. M. Yiu", title = "Memory Efficient Algorithms for Structural Alignment of {RNAs} with Pseudoknots", journal = j-TCBB, volume = "9", number = "1", pages = "161--168", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.66", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mahoney:2012:MOB, author = "Arthur W. Mahoney and Gregory J. Podgorski and Nicholas S. Flann", title = "Multiobjective Optimization Based-Approach for Discovering Novel Cancer Therapies", journal = j-TCBB, volume = "9", number = "1", pages = "169--184", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.39", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2012:PEU, author = "Jianyong Sun and Jonathan M. Garibaldi and Charlie Hodgman", title = "Parameter Estimation Using Metaheuristics in Systems Biology: a Comprehensive Review", journal = j-TCBB, volume = "9", number = "1", pages = "185--202", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.63", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Passerini:2012:PMB, author = "Andrea Passerini and Marco Lippi and Paolo Frasconi", title = "Predicting Metal-Binding Sites from Protein Sequence", journal = j-TCBB, volume = "9", number = "1", pages = "203--213", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.94", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bokhari:2012:RNE, author = "Shahid H. Bokhari and Laura W. Pomeroy and Daniel A. Janies", title = "Reassortment Networks and the Evolution of Pandemic {H1N1} Swine-Origin Influenza", journal = j-TCBB, volume = "9", number = "1", pages = "214--227", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.95", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2012:RNH, author = "Lingyu Ma and Marco Reisert and Hans Burkhardt", title = "{RENNSH}: a Novel $ \alpha $-Helix Identification Approach for Intermediate Resolution Electron Density Maps", journal = j-TCBB, volume = "9", number = "1", pages = "228--239", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.52", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2012:RSH, author = "Shuai Cheng Li and Dongbo Bu and Ming Li", title = "Residues with Similar Hexagon Neighborhoods Share Similar Side-Chain Conformations", journal = j-TCBB, volume = "9", number = "1", pages = "240--248", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.74", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2012:SSM, author = "Hong Sun and Ahmet Sacan and Hakan Ferhatosmanoglu and Yusu Wang", title = "{Smolign}: a Spatial Motifs-Based Protein Multiple Structural Alignment Method", journal = j-TCBB, volume = "9", number = "1", pages = "249--261", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.67", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2012:SGS, author = "Lei Yu and Yue Han and Michael E. Berens", title = "Stable Gene Selection from Microarray Data via Sample Weighting", journal = j-TCBB, volume = "9", number = "1", pages = "262--272", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.47", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonet:2012:CFM, author = "Maria Luisa Bonet and Simone Linz and Katherine {St. John}", title = "The Complexity of Finding Multiple Solutions to Betweenness and Quartet Compatibility", journal = j-TCBB, volume = "9", number = "1", pages = "273--285", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.108", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Daniels:2012:TPS, author = "Noah Daniels and Anoop Kumar and Lenore Cowen and Matt Menke", title = "Touring Protein Space with {Matt}", journal = j-TCBB, volume = "9", number = "1", pages = "286--293", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.70", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xiang:2012:TDT, author = "Yang Xiang and Philip R. O. Payne and Kun Huang", title = "Transactional Database Transformation and Its Application in Prioritizing Human Disease Genes", journal = j-TCBB, volume = "9", number = "1", pages = "294--304", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.58", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ambert:2012:IGS, author = "Kyle H. Ambert and Aaron M. Cohen", title = "$k$-Information Gain Scaled Nearest Neighbors: a Novel Approach to Classifying Protein-Protein Interaction-Related Documents", journal = j-TCBB, volume = "9", number = "1", pages = "305--310", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.32", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2012:MLM, author = "Yun Xu and Da Teng and Yiming Lei", title = "{MinePhos}: a Literature Mining System for Protein Phoshphorylation Information Extraction", journal = j-TCBB, volume = "9", number = "1", pages = "311--315", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.85", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2012:RL, author = "Yun Xu and Da Teng and Yiming Lei", title = "2011 Reviewers List", journal = j-TCBB, volume = "9", number = "1", pages = "316--318", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.2", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2012:AI, author = "Yun Xu and Da Teng and Yiming Lei", title = "2011 Annual Index", journal = j-TCBB, volume = "9", number = "1", pages = "??--??", month = jan, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.1", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Dec 15 08:25:50 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Acharya:2012:GCA, author = "Lipi Acharya and Thair Judeh and Zhansheng Duan and Michael Rabbat and Dongxiao Zhu", title = "{GSGS}: a Computational Approach to Reconstruct Signaling Pathway Structures from Gene Sets", journal = j-TCBB, volume = "9", number = "2", pages = "438--450", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.143", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstruction of signaling pathway structures is essential to decipher complex regulatory relationships in living cells. The existing computational approaches often rely on unrealistic biological assumptions and do not explicitly consider signal transduction mechanisms. Signal transduction events refer to linear cascades of reactions from the cell surface to the nucleus and characterize a signaling pathway. In this paper, we propose a novel approach, Gene Set Gibbs Sampling (GSGS), to reverse engineer signaling pathway structures from gene sets related to the pathways. We hypothesize that signaling pathways are structurally an ensemble of overlapping linear signal transduction events which we encode as Information Flows (IFs).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Andreotti:2012:ALR, author = "Sandro Andreotti and Gunnar W. Klau and Knut Reinert", title = "{Antilope} --- a {Lagrangian} Relaxation Approach to the de novo Peptide Sequencing Problem", journal = j-TCBB, volume = "9", number = "2", pages = "385--394", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.59", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Peptide sequencing from mass spectrometry data is a key step in proteome research. Especially de novo sequencing, the identification of a peptide from its spectrum alone, is still a challenge even for state-of-the-art algorithmic approaches. In this paper, we present antilope, a new fast and flexible approach based on mathematical programming. It builds on the spectrum graph model and works with a variety of scoring schemes. antilope combines Lagrangian relaxation for solving an integer linear programming formulation with an adaptation of Yen's $k$ shortest paths algorithm. It shows a significant improvement in running time compared to mixed integer optimization and performs at the same speed like other state-of-the-art tools.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Angadi:2012:SSS, author = "Ulavappa B. Angadi and M. Venkatesulu", title = "Structural {SCOP} Superfamily Level Classification Using Unsupervised Machine Learning", journal = j-TCBB, volume = "9", number = "2", pages = "601--608", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the major research directions in bioinformatics is that of assigning superfamily classification to a given set of proteins. The classification reflects the structural, evolutionary, and functional relatedness. These relationships are embodied in a hierarchical classification, such as the Structural Classification of Protein (SCOP), which is mostly manually curated. Such a classification is essential for the structural and functional analyses of proteins. Yet a large number of proteins remain unclassified. In this study, we have proposed an unsupervised machine learning approach to classify and assign a given set of proteins to SCOP superfamilies. In the method, we have constructed a database and similarity matrix using P-values obtained from an all-against-all BLAST run and trained the network with the ART2 unsupervised learning algorithm using the rows of the similarity matrix as input vectors, enabling the trained network to classify the proteins from 0.82 to 0.97 f-measure accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DiCamillo:2012:SSB, author = "Barbara {Di Camillo} and Marco Falda and Gianna Toffolo and Claudio Cobelli", title = "{SimBioNeT}: a Simulator of Biological Network Topology", journal = j-TCBB, volume = "9", number = "2", pages = "592--600", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.116", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Studying biological networks at topological level is a major issue in computational biology studies and simulation is often used in this context, either to assess reverse engineering algorithms or to investigate how topological properties depend on network parameters. In both contexts, it is desirable for a topology simulator to reproduce the current knowledge on biological networks, to be able to generate a number of networks with the same properties and to be flexible with respect to the possibility to mimic networks of different organisms. We propose a biological network topology simulator, SimBioNeT, in which module structures of different type and size are replicated at different level of network organization and interconnected, so to obtain the desired degree distribution, e.g., scale free, and a clustering coefficient constant with the number of nodes in the network, a typical characteristic of biological networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:ARN, author = "Zhi-Zhong Chen and Lusheng Wang", title = "Algorithms for Reticulate Networks of Multiple Phylogenetic Trees", journal = j-TCBB, volume = "9", number = "2", pages = "372--384", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A reticulate network $N$ of multiple phylogenetic trees may have nodes with two or more parents (called reticulation nodes). There are two ways to define the reticulation number of $N$. One way is to define it as the number of reticulation nodes in $N$ in this case, a reticulate network with the smallest reticulation number is called an optimal type-I reticulate network of the trees. The better way is to define it as the total number of parents of reticulation nodes in $N$ minus the number of reticulation nodes in $N$; in this case, a reticulate network with the smallest reticulation number is called an optimal type-II reticulate network of the trees.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2012:MDT, author = "Yi-Ming Cheng and Srinivasa Murthy Gopal and Sean M. Law and Michael Feig", title = "Molecular Dynamics Trajectory Compression with a Coarse-Grained Model", journal = j-TCBB, volume = "9", number = "2", pages = "476--486", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.141", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Molecular dynamics trajectories are very data intensive thereby limiting sharing and archival of such data. One possible solution is compression of trajectory data. Here, trajectory compression based on conversion to the coarse-grained model PRIMO is proposed. The compressed data are about one third of the original data and fast decompression is possible with an analytical reconstruction procedure from PRIMO to all-atom representations. This protocol largely preserves structural features and to a more limited extent also energetic features of the original trajectory.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{ElBakry:2012:IDE, author = "Ola ElBakry and M. Omair Ahmad and M. N. S. Swamy", title = "Identification of Differentially Expressed Genes for Time-Course Microarray Data Based on Modified {RM} {ANOVA}", journal = j-TCBB, volume = "9", number = "2", pages = "451--466", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.65", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The regulation of gene expression is a dynamic process, hence it is of vital interest to identify and characterize changes in gene expression over time. We present here a general statistical method for detecting changes in microarray expression over time within a single biological group and is based on repeated measures (RM) ANOVA. In this method, unlike the classical F-statistic, statistical significance is determined taking into account the time dependency of the microarray data. A correction factor for this RM F-statistic is introduced leading to a higher sensitivity as well as high specificity. We investigate the two approaches that exist in the literature for calculating the p-values using resampling techniques of gene-wise p-values and pooled p-values.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feng:2012:LSL, author = "Zeny Z. Feng and Xiaojian Yang and Sanjeena Subedi and Paul D. McNicholas", title = "The {LASSO} and Sparse Least Squares Regression Methods for {SNP} Selection in Predicting Quantitative Traits", journal = j-TCBB, volume = "9", number = "2", pages = "629--636", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.139", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent work concerning quantitative traits of interest has focused on selecting a small subset of single nucleotide polymorphisms (SNPs) from among the SNPs responsible for the phenotypic variation of the trait. When considered as covariates, the large number of variables (SNPs) and their association with those in close proximity pose challenges for variable selection. The features of sparsity and shrinkage of regression coefficients of the least absolute shrinkage and selection operator (LASSO) method appear attractive for SNP selection. Sparse partial least squares (SPLS) is also appealing as it combines the features of sparsity in subset selection and dimension reduction to handle correlations among SNPs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hopfensitz:2012:MBG, author = "Martin Hopfensitz and Christoph Mussel and Christian Wawra and Markus Maucher and Michael Kuhl and Heiko Neumann and Hans A. Kestler", title = "Multiscale Binarization of Gene Expression Data for Reconstructing {Boolean} Networks", journal = j-TCBB, volume = "9", number = "2", pages = "487--498", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.62", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Network inference algorithms can assist life scientists in unraveling gene-regulatory systems on a molecular level. In recent years, great attention has been drawn to the reconstruction of Boolean networks from time series. These need to be binarized, as such networks model genes as binary variables (either ``expressed'' or ``not expressed''). Common binarization methods often cluster measurements or separate them according to statistical or information theoretic characteristics and may require many data points to determine a robust threshold. Yet, time series measurements frequently comprise only a small number of samples. To overcome this limitation, we propose a binarization that incorporates measurements at multiple resolutions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2012:PEL, author = "Qinghua Huang and Dacheng Tao and Xuelong Li and Alan Liew", title = "Parallelized Evolutionary Learning for Detection of Biclusters in Gene Expression Data", journal = j-TCBB, volume = "9", number = "2", pages = "560--570", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.53", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The analysis of gene expression data obtained from microarray experiments is important for discovering the biological process of genes. Biclustering algorithms have been proven to be able to group the genes with similar expression patterns under a number of experimental conditions. In this paper, we propose a new biclustering algorithm based on evolutionary learning. By converting the biclustering problem into a common clustering problem, the algorithm can be applied in a search space constructed by the conditions. To further reduce the size of the search space, we randomly separate the full conditions into a number of condition subsets (subspaces), each of which has a smaller number of conditions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Karafyllidis:2012:QGC, author = "Ioannis G. Karafyllidis", title = "Quantum Gate Circuit Model of Signal Integration in Bacterial Quorum Sensing", journal = j-TCBB, volume = "9", number = "2", pages = "571--579", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.104", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Bacteria evolved cell to cell communication processes to gain information about their environment and regulate gene expression. Quorum sensing is such a process in which signaling molecules, called autoinducers, are produced, secreted and detected. In several cases bacteria use more than one autoinducers and integrate the information conveyed by them. It has not yet been explained adequately why bacteria evolved such signal integration circuits and what can learn about their environments using more than one autoinducers since all signaling pathways merge in one. Here quantum information theory, which includes classical information theory as a special case, is used to construct a quantum gate circuit that reproduces recent experimental results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kelk:2012:EC, author = "Steven Kelk and Celine Scornavacca and Leo van Iersel", title = "On the Elusiveness of Clusters", journal = j-TCBB, volume = "9", number = "2", pages = "517--534", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Rooted phylogenetic networks are often used to represent conflicting phylogenetic signals. Given a set of clusters, a network is said to represent these clusters in the softwired sense if, for each cluster in the input set, at least one tree embedded in the network contains that cluster. Motivated by parsimony we might wish to construct such a network using as few reticulations as possible, or minimizing the level of the network, i.e., the maximum number of reticulations used in any ``tangled'' region of the network. Although these are NP-hard problems, here we prove that, for every fixed $ k \ge 0 $, it is polynomial-time solvable to construct a phylogenetic network with level equal to $k$ representing a cluster set, or to determine that no such network exists.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kentzoglanakis:2012:SIF, author = "Kyriakos Kentzoglanakis and Matthew Poole", title = "A Swarm Intelligence Framework for Reconstructing Gene Networks: Searching for Biologically Plausible Architectures", journal = j-TCBB, volume = "9", number = "2", pages = "358--371", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.87", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we investigate the problem of reverse engineering the topology of gene regulatory networks from temporal gene expression data. We adopt a computational intelligence approach comprising swarm intelligence techniques, namely particle swarm optimization (PSO) and ant colony optimization (ACO). In addition, the recurrent neural network (RNN) formalism is employed for modeling the dynamical behavior of gene regulatory systems. More specifically, ACO is used for searching the discrete space of network architectures and PSO for searching the corresponding continuous space of RNN model parameters. We propose a novel solution construction process in the context of ACO for generating biologically plausible candidate architectures.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kulekci:2012:EMR, author = "M. Oguzhan Kulekci and Jeffrey Scott Vitter and Bojian Xu", title = "Efficient Maximal Repeat Finding Using the {Burrows-Wheeler} Transform and Wavelet Tree", journal = j-TCBB, volume = "9", number = "2", pages = "421--429", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Finding repetitive structures in genomes and proteins is important to understand their biological functions. Many data compressors for modern genomic sequences rely heavily on finding repeats in the sequences. Small-scale and local repetitive structures are better understood than large and complex interspersed ones. The notion of maximal repeats captures all the repeats in the data in a space-efficient way. Prior work on maximal repeat finding used either a suffix tree or a suffix array along with other auxiliary data structures. Their space usage is 19-50 times the text size with the best engineering efforts, prohibiting their usability on massive data such as the whole human genome.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2012:MRD, author = "Wenji Ma and Yong Yang and Zhi-Zhong Chen and Lusheng Wang", title = "Mutation Region Detection for Closely Related Individuals without a Known Pedigree", journal = j-TCBB, volume = "9", number = "2", pages = "499--510", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.134", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Linkage analysis serves as a way of finding locations of genes that cause genetic diseases. Linkage studies have facilitated the identification of several hundreds of human genes that can harbor mutations which by themselves lead to a disease phenotype. The fundamental problem in linkage analysis is to identify regions whose allele is shared by all or almost all affected members but by none or few unaffected members. Almost all the existing methods for linkage analysis are for families with clearly given pedigrees. Little work has been done for the case where the sampled individuals are closely related, but their pedigree is not known.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mimaroglu:2012:DDC, author = "Selim Mimaroglu and Emin Aksehirli", title = "{DICLENS}: Divisive Clustering Ensemble with Automatic Cluster Number", journal = j-TCBB, volume = "9", number = "2", pages = "408--420", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Clustering has a long and rich history in a variety of scientific fields. Finding natural groupings of a data set is a hard task as attested by hundreds of clustering algorithms in the literature. Each clustering technique makes some assumptions about the underlying data set. If the assumptions hold, good clusterings can be expected. It is hard, in some cases impossible, to satisfy all the assumptions. Therefore, it is beneficial to apply different clustering methods on the same data set, or the same method with varying input parameters or both. We propose a novel method, DICLENS, which combines a set of clusterings into a final clustering having better overall quality.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nanni:2012:IBV, author = "Loris Nanni and Alessandra Lumini and Dinesh Gupta and Aarti Garg", title = "Identifying Bacterial Virulent Proteins by Fusing a Set of Classifiers Based on Variants of {Chou}'s Pseudo Amino Acid Composition and on Evolutionary Information", journal = j-TCBB, volume = "9", number = "2", pages = "467--475", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.117", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The availability of a reliable prediction method for prediction of bacterial virulent proteins has several important applications in research efforts targeted aimed at finding novel drug targets, vaccine candidates, and understanding virulence mechanisms in pathogens. In this work, we have studied several feature extraction approaches for representing proteins and propose a novel bacterial virulent protein prediction method, based on an ensemble of classifiers where the features are extracted directly from the amino acid sequence and from the evolutionary information of a given protein. We have evaluated and compared several ensembles obtained by combining six feature extraction methods and several classification approaches based on two general purpose classifiers (i.e., Support Vector Machine and a variant of input decimated ensemble) and their random subspace version.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Phipps:2012:OPN, author = "Paul Phipps and Sergey Bereg", title = "Optimizing Phylogenetic Networks for Circular Split Systems", journal = j-TCBB, volume = "9", number = "2", pages = "535--547", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.109", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We address the problem of realizing a given distance matrix by a planar phylogenetic network with a minimum number of faces. With the help of the popular software SplitsTree4, we start by approximating the distance matrix with a distance metric that is a linear combination of circular splits. The main results of this paper are the necessary and sufficient conditions for the existence of a network with a single face. We show how such a network can be constructed, and we present a heuristic for constructing a network with few faces using the first algorithm as the base case.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Poleksic:2012:CPS, author = "Aleksandar Poleksic", title = "On Complexity of Protein Structure Alignment Problem under Distance Constraint", journal = j-TCBB, volume = "9", number = "2", pages = "511--516", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.133", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study the well-known Largest Common Point-set (LCP) under Bottleneck Distance Problem. Given two proteins $a$ and $b$ (as sequences of points in three-dimensional space) and a distance cutoff $ \sigma $, the goal is to find a spatial superposition and an alignment that maximizes the number of pairs of points from $a$ and $b$ that can be fit under the distance $ \sigma $ from each other. The best to date algorithms for approximate and exact solution to this problem run in time $ O(n^8) $ and $ O(n^{32}) $, respectively, where $n$ represents protein length. This work improves runtime of the approximation algorithm and the expected runtime of the algorithm for absolute optimum for both order-dependent and order-independent alignments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Spillner:2012:CDR, author = "Andreas Spillner and Binh Nguyen and Vincent Moulton", title = "Constructing and Drawing Regular Planar Split Networks", journal = j-TCBB, volume = "9", number = "2", pages = "395--407", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.115", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Split networks are commonly used to visualize collections of bipartitions, also called splits, of a finite set. Such collections arise, for example, in evolutionary studies. Split networks can be viewed as a generalization of phylogenetic trees and may be generated using the SplitsTree package. Recently, the NeighborNet method for generating split networks has become rather popular, in part because it is guaranteed to always generate a circular split system, which can always be displayed by a planar split network. Even so, labels must be placed on the ``outside'' of the network, which might be problematic in some applications. To help circumvent this problem, it can be helpful to consider so-called flat split systems, which can be displayed by planar split networks where labels are allowed on the inside of the network too.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Steinbiss:2012:NED, author = "Sascha Steinbiss and Stefan Kurtz", title = "A New Efficient Data Structure for Storage and Retrieval of Multiple Biosequences", journal = j-TCBB, volume = "9", number = "2", pages = "345--357", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.146", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Today's genome analysis applications require sequence representations allowing for fast access to their contents while also being memory-efficient enough to facilitate analyses of large-scale data. While a wide variety of sequence representations exist, lack of a generic implementation of efficient sequence storage has led to a plethora of poorly reusable or programming language-specific implementations. We present a novel, space-efficient data structure (GtEncseq) for storing multiple biological sequences of variable alphabet size, with customizable character transformations, wildcard support, and an assortment of internal representations optimized for different distributions of wildcards and sequence lengths. For the human genome (3.1 gigabases, including 237 million wildcard characters) our representation requires only $ 2 + 8 \cdot 10^{-6} $ bits per character.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Su:2012:INP, author = "Chien-Hao Su and Tse-Yi Wang and Ming-Tsung Hsu and Francis Cheng-Hsuan Weng and Cheng-Yan Kao and Daryi Wang and Huai-Kuang Tsai", title = "The Impact of Normalization and Phylogenetic Information on Estimating the Distance for Metagenomes", journal = j-TCBB, volume = "9", number = "2", pages = "619--628", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.111", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Metagenomics enables the study of unculturable microorganisms in different environments directly. Discriminating between the compositional differences of metagenomes is an important and challenging problem. Several distance functions have been proposed to estimate the differences based on functional profiles or taxonomic distributions; however, the strengths and limitations of such functions are still unclear. Initially, we analyzed three well-known distance functions and found very little difference between them in the clustering of samples. This motivated us to incorporate suitable normalizations and phylogenetic information into the functions so that we could cluster samples from both real and synthetic data sets. The results indicate significant improvement in sample clustering over that derived by rank-based normalization with phylogenetic information, regardless of whether the samples are from real or synthetic microbiomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:EGS, author = "Daifeng Wang and Mia K. Markey and Claus O. Wilke and Ari Arapostathis", title = "Eigen-Genomic System Dynamic-Pattern Analysis ({ESDA}): Modeling {mRNA} Degradation and Self-Regulation", journal = j-TCBB, volume = "9", number = "2", pages = "430--437", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.150", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput methods systematically measure the internal state of the entire cell, but powerful computational tools are needed to infer dynamics from their raw data. Therefore, we have developed a new computational method, Eigen-genomic System Dynamic-pattern Analysis (ESDA), which uses systems theory to infer dynamic parameters from a time series of gene expression measurements. As many genes are measured at a modest number of time points, estimation of the system matrix is underdetermined and traditional approaches for estimating dynamic parameters are ineffective; thus, ESDA uses the principle of dimensionality reduction to overcome the data imbalance. Since degradation rates are naturally confounded by self-regulation, our model estimates an effective degradation rate that is the difference between self-regulation and degradation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:NEA, author = "Biing-Feng Wang and Chung-Chin Kuo and Shang-Ju Liu and Chien-Hsin Lin", title = "A New Efficient Algorithm for the Gene-Team Problem on General Sequences", journal = j-TCBB, volume = "9", number = "2", pages = "330--344", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.96", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identifying conserved gene clusters is an important step toward understanding the evolution of genomes and predicting the functions of genes. A famous model to capture the essential biological features of a conserved gene cluster is called the gene-team model. The problem of finding the gene teams of two general sequences is the focus of this paper. For this problem, He and Goldwasser had an efficient algorithm that requires $ O(m n) $ time using $ O(m + n) $ working space, where $m$ and $n$ are, respectively, the numbers of genes in the two given sequences. In this paper, a new efficient algorithm is presented. Assume $ m \le n $.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:OSA, author = "Biing-Feng Wang", title = "Output-Sensitive Algorithms for Finding the Nested Common Intervals of Two General Sequences", journal = j-TCBB, volume = "9", number = "2", pages = "548--559", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.112", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The focus of this paper is the problem of finding all nested common intervals of two general sequences. Depending on the treatment one wants to apply to duplicate genes, Blin et al. introduced three models to define nested common intervals of two sequences: the uniqueness, the free-inclusion, and the bijection models. We consider all the three models. For the uniqueness and the bijection models, we give $ O(n + N_{\rm out}) $-time algorithms, where $ N_{\rm out} $ denotes the size of the output. For the free-inclusion model, we give an $ O(n^{1 + \varepsilon } + N_{{\rm out}}) $-time algorithm, where $ \varepsilon > 0 $ is an arbitrarily small constant.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:RCM, author = "Shu-Lin Wang and Yi-Hai Zhu and Wei Jia and De-Shuang Huang", title = "Robust Classification Method of Tumor Subtype by Using Correlation Filters", journal = j-TCBB, volume = "9", number = "2", pages = "580--591", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.135", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tumor classification based on Gene Expression Profiles (GEPs), which is of great benefit to the accurate diagnosis and personalized treatment for different types of tumor, has drawn a great attention in recent years. This paper proposes a novel tumor classification method based on correlation filters to identify the overall pattern of tumor subtype hidden in differentially expressed genes. Concretely, two correlation filters, i.e., Minimum Average Correlation Energy (MACE) and Optimal Tradeoff Synthetic Discriminant Function (OTSDF), are introduced to determine whether a test sample matches the templates synthesized for each subclass. The experiments on six publicly available data sets indicate that the proposed method is robust to noise, and can more effectively avoid the effects of dimensionality curse.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yoon:2012:SLP, author = "Yongwook Yoon and Gary Geunbae Lee", title = "Subcellular Localization Prediction through Boosting Association Rules", journal = j-TCBB, volume = "9", number = "2", pages = "609--618", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.131", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational methods for predicting protein subcellular localization have used various types of features, including N-terminal sorting signals, amino acid compositions, and text annotations from protein databases. Our approach does not use biological knowledge such as the sorting signals or homologues, but use just protein sequence information. The method divides a protein sequence into short k-mer sequence fragments which can be mapped to word features in document classification. A large number of class association rules are mined from the protein sequence examples that range from the N-terminus to the C-terminus. Then, a boosting algorithm is applied to those rules to build up a final classifier.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zeng:2012:HES, author = "Nianyin Zeng and Zidong Wang and Yurong Li and Min Du and Xiaohui Liu", title = "A Hybrid {EKF} and Switching {PSO} Algorithm for Joint State and Parameter Estimation of Lateral Flow Immunoassay Models", journal = j-TCBB, volume = "9", number = "2", pages = "321--329", month = mar, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.140", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 26 16:30:44 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, a hybrid extended Kalman filter (EKF) and switching particle swarm optimization (SPSO) algorithm is proposed for jointly estimating both the parameters and states of the lateral flow immunoassay model through available short time-series measurement. Our proposed method generalizes the well-known EKF algorithm by imposing physical constraints on the system states. Note that the state constraints are encountered very often in practice that give rise to considerable difficulties in system analysis and design. The main purpose of this paper is to handle the dynamic modeling problem with state constraints by combining the extended Kalman filtering and constrained optimization algorithms via the maximization probability method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mazza:2012:HPC, author = "Tommaso Mazza", title = "High Performance Computational Systems Biology", journal = j-TCBB, volume = "9", number = "3", pages = "641--642", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.42", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Burkitt:2012:CCB, author = "Mark Burkitt and Dawn Walker and Daniella M. Romano and Alireza Fazeli", title = "Constructing Complex {$3$D} Biological Environments from Medical Imaging Using High Performance Computing", journal = j-TCBB, volume = "9", number = "3", pages = "643--654", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.69", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extracting information about the structure of biological tissue from static image data is a complex task requiring computationally intensive operations. Here, we present how multicore CPUs and GPUs have been utilized to extract information about the shape, size, and path followed by the mammalian oviduct, called the fallopian tube in humans, from histology images, to create a unique but realistic 3D virtual organ. Histology images were processed to identify the individual cross sections and determine the 3D path that the tube follows through the tissue. This information was then related back to the histology images, linking the 2D cross sections with their corresponding 3D position along the oviduct. A series of linear 2D spline cross sections, which were computationally generated for the length of the oviduct, were bound to the 3D path of the tube using a novel particle system technique that provides smooth resolution of self-intersections. This results in a unique 3D model of the oviduct, which is grounded in reality. The GPU is used for the processor intensive operations of image processing and particle physics based simulations, significantly reducing the time required to generate a complete model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dematte:2012:SGP, author = "Lorenzo Dematte", title = "{Smoldyn} on Graphics Processing Units: Massively Parallel {Brownian} Dynamics Simulations", journal = j-TCBB, volume = "9", number = "3", pages = "655--667", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.106", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Space is a very important aspect in the simulation of biochemical systems; recently, the need for simulation algorithms able to cope with space is becoming more and more compelling. Complex and detailed models of biochemical systems need to deal with the movement of single molecules and particles, taking into consideration localized fluctuations, transportation phenomena, and diffusion. A common drawback of spatial models lies in their complexity: models can become very large, and their simulation could be time consuming, especially if we want to capture the systems behavior in a reliable way using stochastic methods in conjunction with a high spatial resolution. In order to deliver the promise done by systems biology to be able to understand a system as whole, we need to scale up the size of models we are able to simulate, moving from sequential to parallel simulation algorithms. In this paper, we analyze Smoldyn, a widely diffused algorithm for stochastic simulation of chemical reactions with spatial resolution and single molecule detail, and we propose an alternative, innovative implementation that exploits the parallelism of Graphics Processing Units (GPUs). The implementation executes the most computational demanding steps (computation of diffusion, unimolecular, and bimolecular reaction, as well as the most common cases of molecule-surface interaction) on the GPU, computing them in parallel on each molecule of the system. The implementation offers good speed-ups and real time, high quality graphics output.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Belcastro:2012:REA, author = "Vincenzo Belcastro and Francesco Gregoretti and Velia Siciliano and Michele Santoro and Giovanni D'Angelo and Gennaro Oliva and Diego di Bernardo", title = "Reverse Engineering and Analysis of Genome-Wide Gene Regulatory Networks from Gene Expression Profiles Using High-Performance Computing", journal = j-TCBB, volume = "9", number = "3", pages = "668--678", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.60", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Regulation of gene expression is a carefully regulated phenomenon in the cell. ``Reverse-engineering'' algorithms try to reconstruct the regulatory interactions among genes from genome-scale measurements of gene expression profiles (microarrays). Mammalian cells express tens of thousands of genes; hence, hundreds of gene expression profiles are necessary in order to have acceptable statistical evidence of interactions between genes. As the number of profiles to be analyzed increases, so do computational costs and memory requirements. In this work, we designed and developed a parallel computing algorithm to reverse-engineer genome-scale gene regulatory networks from thousands of gene expression profiles. The algorithm is based on computing pairwise Mutual Information between each gene-pair. We successfully tested it to reverse engineer the Mus Musculus (mouse) gene regulatory network in liver from gene expression profiles collected from a public repository. A parallel hierarchical clustering algorithm was implemented to discover ``communities'' within the gene network. Network communities are enriched for genes involved in the same biological functions. The inferred network was used to identify two mitochondrial proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bustamam:2012:FPM, author = "Alhadi Bustamam and Kevin Burrage and Nicholas A. Hamilton", title = "Fast Parallel {Markov} Clustering in Bioinformatics Using Massively Parallel Computing on {GPU} with {CUDA} and {ELLPACK-R} Sparse Format", journal = j-TCBB, volume = "9", number = "3", pages = "679--692", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.68", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Markov clustering (MCL) is becoming a key algorithm within bioinformatics for determining clusters in networks. However, with increasing vast amount of data on biological networks, performance and scalability issues are becoming a critical limiting factor in applications. Meanwhile, GPU computing, which uses CUDA tool for implementing a massively parallel computing environment in the GPU card, is becoming a very powerful, efficient, and low-cost option to achieve substantial performance gains over CPU approaches. The use of on-chip memory on the GPU is efficiently lowering the latency time, thus, circumventing a major issue in other parallel computing environments, such as MPI. We introduce a very fast Markov clustering algorithm using CUDA (CUDA-MCL) to perform parallel sparse matrix-matrix computations and parallel sparse Markov matrix normalizations, which are at the heart of MCL. We utilized ELLPACK-R sparse format to allow the effective and fine-grain massively parallel processing to cope with the sparse nature of interaction networks data sets in bioinformatics applications. As the results show, CUDA-MCL is significantly faster than the original MCL running on CPU. Thus, large-scale parallel computation on off-the-shelf desktop-machines, that were previously only possible on supercomputing architectures, can significantly change the way bioinformaticians and biologists deal with their data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Barnat:2012:PSP, author = "Jiri Barnat and Lubos Brim and Adam Krejci and Adam Streck and David Safranek and Martin Vejnar and Tomas Vejpustek", title = "On Parameter Synthesis by Parallel Model Checking", journal = j-TCBB, volume = "9", number = "3", pages = "693--705", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.110", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An important problem in current computational systems biology is to analyze models of biological systems dynamics under parameter uncertainty. This paper presents a novel algorithm for parameter synthesis based on parallel model checking. The algorithm is conceptually universal with respect to the modeling approach employed. We introduce the algorithm, show its scalability, and examine its applicability on several biological models.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Stegmayer:2012:BIV, author = "Georgina Stegmayer and Diego H. Milone and Laura Kamenetzky and Mariana G. Lopez and Fernando Carrari", title = "A Biologically Inspired Validity Measure for Comparison of Clustering Methods over Metabolic Data Sets", journal = j-TCBB, volume = "9", number = "3", pages = "706--716", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.10", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the biological domain, clustering is based on the assumption that genes or metabolites involved in a common biological process are coexpressed/coaccumulated under the control of the same regulatory network. Thus, a detailed inspection of the grouped patterns to verify their memberships to well-known metabolic pathways could be very useful for the evaluation of clusters from a biological perspective. The aim of this work is to propose a novel approach for the comparison of clustering methods over metabolic data sets, including prior biological knowledge about the relation among elements that constitute the clusters. A way of measuring the biological significance of clustering solutions is proposed. This is addressed from the perspective of the usefulness of the clusters to identify those patterns that change in coordination and belong to common pathways of metabolic regulation. The measure summarizes in a compact way the objective analysis of clustering methods, which respects coherence and clusters distribution. It also evaluates the biological internal connections of such clusters considering common pathways. The proposed measure was tested in two biological databases using three clustering methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pizzuti:2012:CAM, author = "Clara Pizzuti and Simona E. Rombo", title = "A Coclustering Approach for Mining Large Protein-Protein Interaction Networks", journal = j-TCBB, volume = "9", number = "3", pages = "717--730", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.158", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Several approaches have been presented in the literature to cluster Protein-Protein Interaction (PPI) networks. They can be grouped in two main categories: those allowing a protein to participate in different clusters and those generating only nonoverlapping clusters. In both cases, a challenging task is to find a suitable compromise between the biological relevance of the results and a comprehensive coverage of the analyzed networks. Indeed, methods returning high accurate results are often able to cover only small parts of the input PPI network, especially when low-characterized networks are considered. We present a coclustering-based technique able to generate both overlapping and nonoverlapping clusters. The density of the clusters to search for can also be set by the user. We tested our method on the two networks of yeast and human, and compared it to other five well-known techniques on the same interaction data sets. The results showed that, for all the examples considered, our approach always reaches a good compromise between accuracy and network coverage. Furthermore, the behavior of our algorithm is not influenced by the structure of the input network, different from all the techniques considered in the comparison, which returned very good results on the yeast network, while on the human network their outcomes are rather poor.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kountouris:2012:CSF, author = "Petros Kountouris and Michalis Agathocleous and Vasilis J. Promponas and Georgia Christodoulou and Simos Hadjicostas and Vassilis Vassiliades and Chris Christodoulou", title = "A Comparative Study on Filtering Protein Secondary Structure Prediction", journal = j-TCBB, volume = "9", number = "3", pages = "731--739", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.22", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Filtering of Protein Secondary Structure Prediction (PSSP) aims to provide physicochemically realistic results, while it usually improves the predictive performance. We performed a comparative study on this challenging problem, utilizing both machine learning techniques and empirical rules and we found that combinations of the two lead to the highest improvement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2012:FIF, author = "Xiao-Fei Zhang and Dao-Qing Dai", title = "A Framework for Incorporating Functional Interrelationships into Protein Function Prediction Algorithms", journal = j-TCBB, volume = "9", number = "3", pages = "740--753", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.148", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The functional annotation of proteins is one of the most important tasks in the post-genomic era. Although many computational approaches have been developed in recent years to predict protein function, most of these traditional algorithms do not take interrelationships among functional terms into account, such as different GO terms usually coannotate with some common proteins. In this study, we propose a new functional similarity measure in the form of Jaccard coefficient to quantify these interrelationships and also develop a framework for incorporating GO term similarity into protein function prediction process. The experimental results of cross-validation on \bioname{S. cerevisiae} and {\em Homo sapiens} data sets demonstrate that our method is able to improve the performance of protein function prediction. In addition, we find that small size terms associated with a few of proteins obtain more benefit than the large size ones when considering functional interrelationships. We also compare our similarity measure with other two widely used measures, and results indicate that when incorporated into function prediction algorithms, our proposed measure is more effective. Experiment results also illustrate that our algorithms outperform two previous competing algorithms, which also take functional interrelationships into account, in prediction accuracy. Finally, we show that our method is robust to annotations in the database which are not complete at present. These results give new insights about the importance of functional interrelationships in protein function prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sharma:2012:TRF, author = "Alok Sharma and Seiya Imoto and Satoru Miyano", title = "A Top-r Feature Selection Algorithm for Microarray Gene Expression Data", journal = j-TCBB, volume = "9", number = "3", pages = "754--764", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.151", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Most of the conventional feature selection algorithms have a drawback whereby a weakly ranked gene that could perform well in terms of classification accuracy with an appropriate subset of genes will be left out of the selection. Considering this shortcoming, we propose a feature selection algorithm in gene expression data analysis of sample classifications. The proposed algorithm first divides genes into subsets, the sizes of which are relatively small (roughly of size $h$ ), then selects informative smaller subsets of genes (of size $ r < h $ ) from a subset and merges the chosen genes with another gene subset (of size $r$ ) to update the gene subset. We repeat this process until all subsets are merged into one informative subset. We illustrate the effectiveness of the proposed algorithm by analyzing three distinct gene expression data sets. Our method shows promising classification accuracy for all the test data sets. We also show the relevance of the selected genes in terms of their biological functions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2012:CPS, author = "Shuai Cheng Li and Dongbo Bu and Ming Li", title = "Clustering 100,000 Protein Structure Decoys in Minutes", journal = j-TCBB, volume = "9", number = "3", pages = "765--773", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.142", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ab initio protein structure prediction methods first generate large sets of structural conformations as candidates (called decoys), and then select the most representative decoys through clustering techniques. Classical clustering methods are inefficient due to the pairwise distance calculation, and thus become infeasible when the number of decoys is large. In addition, the existing clustering approaches suffer from the arbitrariness in determining a distance threshold for proteins within a cluster: a small distance threshold leads to many small clusters, while a large distance threshold results in the merging of several independent clusters into one cluster. In this paper, we propose an efficient clustering method through fast estimating cluster centroids and efficient pruning rotation spaces. The number of clusters is automatically detected by information distance criteria. A package named ONION, which can be downloaded freely, is implemented accordingly. Experimental results on benchmark data sets suggest that ONION is 14 times faster than existing tools, and ONION obtains better selections for 31 targets, and worse selection for 19 targets compared to SPICKER's selections. On an average PC, ONION can cluster 100,000 decoys in around 12 minutes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2012:DFF, author = "Yanni Sun and Jeremy Buhler and Cheng Yuan", title = "Designing Filters for Fast-Known {NcRNA} Identification", journal = j-TCBB, volume = "9", number = "3", pages = "774--787", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.149", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting members of known noncoding RNA (ncRNA) families in genomic DNA is an important part of sequence annotation. However, the most widely used tool for modeling ncRNA families, the covariance model (CM), incurs a high-computational cost when used for genome-wide search. This cost can be reduced by using a filter to exclude sequences that are unlikely to contain the ncRNA of interest, applying the CM only where it is likely to match strongly. Despite recent advances, designing an efficient filter that can detect ncRNA instances lacking strong conservation while excluding most irrelevant sequences remains challenging. In this work, we design three types of filters based on multiple secondary structure profiles (SSPs). An SSP augments a regular profile (i.e., a position weight matrix) with secondary structure information but can still be efficiently scanned against long sequences. Multi-SSP-based filters combine evidence from multiple SSP matches and can achieve high sensitivity and specificity. Our SSP-based filters are extensively tested in BRAliBase III data set, Rfam 9.0, and a published soil metagenomic data set. In addition, we compare the SSP-based filters with several other ncRNA search tools including Infernal (with profile HMMs as filters), ERPIN, and tRNAscan-SE. Our experiments demonstrate that carefully designed SSP filters can achieve significant speedup over unfiltered CM search while maintaining high sensitivity for various ncRNA families. The designed filters and filter-scanning programs are available at our website: www.cse.msu.edu/~yannisun/ssp/", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Krejnik:2012:EEA, author = "Milos Krejnik and Jiri Klema", title = "Empirical Evidence of the Applicability of Functional Clustering through Gene Expression Classification", journal = j-TCBB, volume = "9", number = "3", pages = "788--798", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.23", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The availability of a great range of prior biological knowledge about the roles and functions of genes and gene-gene interactions allows us to simplify the analysis of gene expression data to make it more robust, compact, and interpretable. Here, we objectively analyze the applicability of functional clustering for the identification of groups of functionally related genes. The analysis is performed in terms of gene expression classification and uses predictive accuracy as an unbiased performance measure. Features of biological samples that originally corresponded to genes are replaced by features that correspond to the centroids of the gene clusters and are then used for classifier learning. Using 10 benchmark data sets, we demonstrate that functional clustering significantly outperforms random clustering without biological relevance. We also show that functional clustering performs comparably to gene expression clustering, which groups genes according to the similarity of their expression profiles. Finally, the suitability of functional clustering as a feature extraction technique is evaluated and discussed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Armano:2012:EII, author = "Giuliano Armano and Filippo Ledda", title = "Exploiting Intrastructure Information for Secondary Structure Prediction with Multifaceted Pipelines", journal = j-TCBB, volume = "9", number = "3", pages = "799--808", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.159", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Predicting the secondary structure of proteins is still a typical step in several bioinformatic tasks, in particular, for tertiary structure prediction. Notwithstanding the impressive results obtained so far, mostly due to the advent of sequence encoding schemes based on multiple alignment, in our view the problem should be studied from a novel perspective, in which understanding how available information sources are dealt with plays a central role. After revisiting a well-known secondary structure predictor viewed from this perspective (with the goal of identifying which sources of information have been considered and which have not), we propose a generic software architecture designed to account for all relevant information sources. To demonstrate the validity of the approach, a predictor compliant with the proposed generic architecture has been implemented and compared with several state-of-the-art secondary structure predictors. Experiments have been carried out on standard data sets, and the corresponding results confirm the validity of the approach. The predictor is available at \path=http://iasc.diee.unica.it/ssp2/= through the corresponding web application or as downloadable stand-alone portable unpack-and-run bundle.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Serang:2012:FMS, author = "Oliver Serang and William Stratford Noble", title = "Faster Mass Spectrometry-Based Protein Inference: Junction Trees Are More Efficient than Sampling and Marginalization by Enumeration", journal = j-TCBB, volume = "9", number = "3", pages = "809--817", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.26", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of identifying the proteins in a complex mixture using tandem mass spectrometry can be framed as an inference problem on a graph that connects peptides to proteins. Several existing protein identification methods make use of statistical inference methods for graphical models, including expectation maximization, Markov chain Monte Carlo, and full marginalization coupled with approximation heuristics. We show that, for this problem, the majority of the cost of inference usually comes from a few highly connected subgraphs. Furthermore, we evaluate three different statistical inference methods using a common graphical model, and we demonstrate that junction tree inference substantially improves rates of convergence compared to existing methods. The python code used for this paper is available at \path=http://noble.gs.washington.edu/proj/fido=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2012:GCU, author = "Hong Huang and Hailiang Feng", title = "Gene Classification Using Parameter-Free Semi-Supervised Manifold Learning", journal = j-TCBB, volume = "9", number = "3", pages = "818--827", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.152", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A new manifold learning method, called parameter-free semi-supervised local Fisher discriminant analysis (pSELF), is proposed to map the gene expression data into a low-dimensional space for tumor classification. Motivated by the fact that semi-supervised and parameter-free are two desirable and promising characteristics for dimension reduction, a new difference-based optimization objective function with unlabeled samples has been designed. The proposed method preserves the global structure of unlabeled samples in addition to separating labeled samples in different classes from each other. The semi-supervised method has an analytic form of the globally optimal solution, which can be computed efficiently by eigen decomposition. Experimental results on synthetic data and SRBCT, DLBCL, and Brain Tumor gene expression data sets demonstrate the effectiveness of the proposed method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jarvis:2012:MIP, author = "Peter Jarvis and Jeremy Sumner", title = "{Markov} Invariants for Phylogenetic Rate Matrices Derived from Embedded Submodels", journal = j-TCBB, volume = "9", number = "3", pages = "828--836", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.24", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider novel phylogenetic models with rate matrices that arise via the embedding of a progenitor model on a small number of character states, into a target model on a larger number of character states. Adapting representation-theoretic results from recent investigations of Markov invariants for the general rate matrix model, we give a prescription for identifying and counting Markov invariants for such ``symmetric embedded'' models, and we provide enumerations of these for the first few cases with a small number of character states. The simplest example is a target model on three states, constructed from a general 2 state model; the ``$ 2 \hookrightarrow 3 $'' embedding. We show that for 2 taxa, there exist two invariants of quadratic degree that can be used to directly infer pairwise distances from observed sequences under this model. A simple simulation study verifies their theoretical expected values, and suggests that, given the appropriateness of the model class, they have superior statistical properties than the standard (log) Det invariant (which is of cubic degree for this case).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2012:MPD, author = "Cheng-Hong Yang and Yu-Huei Cheng and Cheng-Huei Yang and Li-Yeh Chuang", title = "Mutagenic Primer Design for Mismatch {PCR-RFLP} {SNP} Genotyping Using a Genetic Algorithm", journal = j-TCBB, volume = "9", number = "3", pages = "837--845", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.25", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Polymerase chain reaction-restriction fragment length polymorphism (PCR-RFLP) is useful in small-scale basic research studies of complex genetic diseases that are associated with single nucleotide polymorphism (SNP). Designing a feasible primer pair is an important work before performing PCR-RFLP for SNP genotyping. However, in many cases, restriction enzymes to discriminate the target SNP resulting in the primer design is not applicable. A mutagenic primer is introduced to solve this problem. GA-based Mismatch PCR-RFLP Primers Design (GAMPD) provides a method that uses a genetic algorithm to search for optimal mutagenic primers and available restriction enzymes from REBASE. In order to improve the efficiency of the proposed method, a mutagenic matrix is employed to judge whether a hypothetical mutagenic primer can discriminate the target SNP by digestion with available restriction enzymes. The available restriction enzymes for the target SNP are mined by the updated core of SNP-RFLPing. GAMPD has been used to simulate the SNPs in the human SLC6A4 gene under different parameter settings and compared with SNP Cutter for mismatch PCR-RFLP primer design. The in silico simulation of the proposed GAMPD program showed that it designs mismatch PCR-RFLP primers. The GAMPD program is implemented in JAVA and is freely available at \path=http://bio.kuas.edu.tw/gampd/=", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Norkus:2012:AAL, author = "Mindaugas Norkus and Damien Fay and Mary J. Murphy and Frank Barry and Gearoid OLaighin and Liam Kilmartin", title = "On the Application of Active Learning and {Gaussian} Processes in Postcryopreservation Cell Membrane Integrity Experiments", journal = j-TCBB, volume = "9", number = "3", pages = "846--856", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.155", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological cell cryopreservation permits storage of specimens for future use. Stem cell cryostorage in particular is fast becoming a broadly spread practice due to their potential for use in regenerative medicine. For the optimal cryopreservation process, ultralow temperatures are needed. However, elevated temperatures are often unavoidable in a typical sample handling cycle which in turn negatively affects the postcryopreservation integrity of cells. In this paper, we present an application of active learning using an underlying Gaussian Process (GP) model in an experimental study on postcryopreservation membrane integrity response to a range of elevated temperature conditions. We tailored this technique for the current investigation and developed an algorithm which enabled identification of the sampling locations for the experiments in order to obtain the highest information return about the process from a limited size sample set. We applied this algorithm in the experimental study investigating the effects of severe temperature elevation (ranging from $ - 40 $ to $ 20^{\circ } ${\rm C}) over a short term event (48 hours) on the postcryopreservation membrane integrity of Mesenchymal Stem Cells (MSCs) derived from human bone marrow. The algorithm showed excellent performance by selecting the locations which maximized the reduction of variance of the process response estimate. An approximating GP model developed from this experimental data shows that the elevated temperatures during cryopreservation have an imminent detrimental effect on cell integrity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2012:PCD, author = "Xiao-Fei Zhang and Dao-Qing Dai and Xiao-Xin Li", title = "Protein Complexes Discovery Based on Protein-Protein Interaction Data via a Regularized Sparse Generative Network Model", journal = j-TCBB, volume = "9", number = "3", pages = "857--870", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.20", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting protein complexes from protein interaction networks is one major task in the postgenome era. Previous developed computational algorithms identifying complexes mainly focus on graph partition or dense region finding. Most of these traditional algorithms cannot discover overlapping complexes which really exist in the protein-protein interaction (PPI) networks. Even if some density-based methods have been developed to identify overlapping complexes, they are not able to discover complexes that include peripheral proteins. In this study, motivated by recent successful application of generative network model to describe the generation process of PPI networks and to detect communities from social networks, we develop a regularized sparse generative network model (RSGNM), by adding another process that generates propensities using exponential distribution and incorporating Laplacian regularizer into an existing generative network model, for protein complexes identification. By assuming that the propensities are generated using exponential distribution, the estimators of propensities will be sparse, which not only has good biological interpretation but also helps to control the overlapping rate among detected complexes. And the Laplacian regularizer will lead to the estimators of propensities more smooth on interaction networks. Experimental results on three yeast PPI networks show that RSGNM outperforms six previous competing algorithms in terms of the quality of detected complexes. In addition, RSGNM is able to detect overlapping complexes and complexes including peripheral proteins simultaneously. These results give new insights about the importance of generative network models in protein complexes identification.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2012:QDS, author = "Yong Zhang and Peng Li and Garng Huang", title = "Quantifying Dynamic Stability of Genetic Memory Circuits", journal = j-TCBB, volume = "9", number = "3", pages = "871--884", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.132", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Bistability/Multistability has been found in many biological systems including genetic memory circuits. Proper characterization of system stability helps to understand biological functions and has potential applications in fields such as synthetic biology. Existing methods of analyzing bistability are either qualitative or in a static way. Assuming the circuit is in a steady state, the latter can only reveal the susceptibility of the stability to injected DC noises. However, this can be inappropriate and inadequate as dynamics are crucial for many biological networks. In this paper, we quantitatively characterize the dynamic stability of a genetic conditional memory circuit by developing new dynamic noise margin (DNM) concepts and associated algorithms based on system theory. Taking into account the duration of the noisy perturbation, the DNMs are more general cases of their static counterparts. Using our techniques, we analyze the noise immunity of the memory circuit and derive insights on dynamic hold and write operations. Considering cell-to-cell variations, our parametric analysis reveals that the dynamic stability of the memory circuit has significantly varying sensitivities to underlying biochemical reactions attributable to differences in structure, time scales, and nonlinear interactions between reactions. With proper extensions, our techniques are broadly applicable to other multistable biological systems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Czeizler:2012:QAS, author = "Eugen Czeizler and Andrzej Mizera and Elena Czeizler and Ralph-Johan Back and John E. Eriksson and Ion Petre", title = "Quantitative Analysis of the Self-Assembly Strategies of Intermediate Filaments from Tetrameric {Vimentin}", journal = j-TCBB, volume = "9", number = "3", pages = "885--898", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.154", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In vitro assembly of intermediate filaments from tetrameric vimentin consists of a very rapid phase of tetramers laterally associating into unit-length filaments and a slow phase of filament elongation. We focus in this paper on a systematic quantitative investigation of two molecular models for filament assembly, recently proposed in (Kirmse et al. J. Biol. Chem. 282, 52 (2007), 18563-18572), through mathematical modeling, model fitting, and model validation. We analyze the quantitative contribution of each filament elongation strategy: with tetramers, with unit-length filaments, with longer filaments, or combinations thereof. In each case, we discuss the numerical fitting of the model with respect to one set of data, and its separate validation with respect to a second, different set of data. We introduce a high-resolution model for vimentin filament self-assembly, able to capture the detailed dynamics of filaments of arbitrary length. This provides much more predictive power for the model, in comparison to previous models where only the mean length of all filaments in the solution could be analyzed. We show how kinetic observations on low-resolution models can be extrapolated to the high-resolution model and used for lowering its complexity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mooney:2012:GGU, author = "Michael Mooney and Beth Wilmot and The Bipolar Genome Study and Shannon McWeeney", title = "The {GA} and the {GWAS}: Using Genetic Algorithms to Search for Multilocus Associations", journal = j-TCBB, volume = "9", number = "3", pages = "899--910", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.145", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Enormous data collection efforts and improvements in technology have made large genome-wide association studies a promising approach for better understanding the genetics of common diseases. Still, the knowledge gained from these studies may be extended even further by testing the hypothesis that genetic susceptibility is due to the combined effect of multiple variants or interactions between variants. Here, we explore and evaluate the use of a genetic algorithm to discover groups of SNPs (of size 2, 3, or 4) that are jointly associated with bipolar disorder. The algorithm is guided by the structure of a gene interaction network, and is able to find groups of SNPs that are strongly associated with the disease, while performing far fewer statistical tests than other methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mazza:2012:RTP, author = "Tommaso Mazza and Paolo Ballarini and Rosita Guido and Davide Prandi", title = "The Relevance of Topology in Parallel Simulation of Biological Networks", journal = j-TCBB, volume = "9", number = "3", pages = "911--923", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.27", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Important achievements in traditional biology have deepened the knowledge about living systems leading to an extensive identification of parts-list of the cell as well as of the interactions among biochemical species responsible for cell's regulation. Such an expanding knowledge also introduces new issues. For example, the increasing comprehension of the interdependencies between pathways (pathways cross-talk) has resulted, on one hand, in the growth of informational complexity, on the other, in a strong lack of information coherence. The overall grand challenge remains unchanged: to be able to assemble the knowledge of every ``piece'' of a system in order to figure out the behavior of the whole (integrative approach). In light of these considerations, high performance computing plays a fundamental role in the context of in-silico biology. Stochastic simulation is a renowned analysis tool, which, although widely used, is subject to stringent computational requirements, in particular when dealing with heterogeneous and high dimensional systems. Here, we introduce and discuss a methodology aimed at alleviating the burden of simulating complex biological networks. Such a method, which springs from graph theory, is based on the principle of fragmenting the computational space of a simulation trace and delegating the computation of fragments to a number of parallel processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sengupta:2012:WMC, author = "Debarka Sengupta and Ujjwal Maulik and Sanghamitra Bandyopadhyay", title = "Weighted {Markov} Chain Based Aggregation of Biomolecule Orderings", journal = j-TCBB, volume = "9", number = "3", pages = "924--933", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The scope and effectiveness of Rank Aggregation (RA) have already been established in contemporary bioinformatics research. Rank aggregation helps in meta-analysis of putative results collected from different analytic or experimental sources. For example, we often receive considerably differing ranked lists of genes or microRNAs from various target prediction algorithms or microarray studies. Sometimes combining them all, in some sense, yields more effective ordering of the set of objects. Also, assigning a certain level of confidence to each source of ranking is a natural demand of aggregation. Assignment of weights to the sources of orderings can be performed by experts. Several rank aggregation approaches like those based on Markov Chains (MCs), evolutionary algorithms, etc., exist in the literature. Markov chains, in general, are faster than the evolutionary approaches. Unlike the evolutionary computing approaches Markov chains have not been used for weighted aggregation scenarios. This is because of the absence of a formal framework of Weighted Markov Chain (WMC). In this paper, we propose the use of a modified version of MC4 (one of the Markov chains proposed by Dwork et al., 2001), followed by the weighted analog of local Kemenization for performing rank aggregation, where the sources of rankings can be prioritized by an expert. Effectiveness of the weighted Markov chain approach over the very recently proposed Genetic Algorithm (GA) and Cross-Entropy Monte Carlo (MC) algorithm-based techniques, has been established for gene orderings from microarray analysis and orderings of predicted microRNA targets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zoppis:2012:MIO, author = "Italo Zoppis and Erica Gianazza and Massimiliano Borsani and Clizia Chinello and Veronica Mainini and Carmen Galbusera and Carlo Ferrarese and Gloria Galimberti and Sandro Sorbi and Barbara Borroni and Fulvio Magni and Marco Antoniotti and Giancarlo Mauri", title = "Mutual Information Optimization for Mass Spectra Data Alignment", journal = j-TCBB, volume = "9", number = "3", pages = "934--939", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.80", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "``Signal'' alignments play critical roles in many clinical setting. This is the case of mass spectrometry (MS) data, an important component of many types of proteomic analysis. A central problem occurs when one needs to integrate (MS) data produced by different sources, e.g., different equipment and/or laboratories. In these cases, some form of ``data integration'' or ``data fusion'' may be necessary in order to discard some source-specific aspects and improve the ability to perform a classification task such as inferring the ``disease classes'' of patients. The need for new high-performance data alignments methods is therefore particularly important in these contexts. In this paper, we propose an approach based both on an information theory perspective, generally used in a feature construction problem, and the application of a mathematical programming task (i.e., the weighted bipartite matching problem). We present the results of a competitive analysis of our method against other approaches. The analysis was conducted on data from plasma/ethylenediaminetetraacetic acid of ``control'' and Alzheimer patients collected from three different hospitals. The results point to a significant performance advantage of our method with respect to the competing ones tested.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fagundes-Lima:2012:CSS, author = "Denise Fagundes-Lima and Gerald Weber", title = "Comment on {``SCS: Signal, Context, and Structure Features for Genome-Wide Human Promoter Recognition''}", journal = j-TCBB, volume = "9", number = "3", pages = "940--941", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.130", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We comment on the flexibility profiles calculated by Zeng et al., and show that these profiles do not represent the local flexibility of the DNA molecule. If one takes into account the physics of elasticity, the averaged flexibility profile show an additional peak which is missed in the original calculation. We show that it is not possible to calculate the flexibility of a 6-mer using tetranucleotide elastic constants, the shortest sequence is a 7-mer. For 6-mers, dinucleotide or trinucleotide parameters are needed. We present calculations for dinucleotide flexibility parameters and show that the same additional peak is present for both 7--mers and 6-mers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2012:IAT, author = "Anonymous", title = "{IEEE\slash ACM Transactions on Computational Biology and Bioinformatics} Seeks New {Editor in Chief} for 2013--2014 Terms", journal = j-TCBB, volume = "9", number = "3", pages = "942--942", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.43", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:GEB, author = "Luonan Chen and Michael K. Ng", title = "Guest Editorial: Bioinformatics and Computational Systems Biology", journal = j-TCBB, volume = "9", number = "4", pages = "945--946", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.76", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yuan:2012:SRN, author = "Yinyin Yuan and Christina Curtis and Carlos Caldas and Florian Markowetz", title = "A Sparse Regulatory Network of Copy-Number Driven Gene Expression Reveals Putative Breast Cancer Oncogenes", journal = j-TCBB, volume = "9", number = "4", pages = "947--954", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.105", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Copy number aberrations are recognized to be important in cancer as they may localize to regions harboring oncogenes or tumor suppressors. Such genomic alterations mediate phenotypic changes through their impact on expression. Both cis- and trans-acting alterations are important since they may help to elucidate putative cancer genes. However, amidst numerous passenger genes, trans-effects are less well studied due to the computational difficulty in detecting weak and sparse signals in the data, and yet may influence multiple genes on a global scale. We propose an integrative approach to learn a sparse interaction network of DNA copy-number regions with their downstream transcriptional targets in breast cancer. With respect to goodness of fit on both simulated and real data, the performance of sparse network inference is no worse than other state-of-the-art models but with the advantage of simultaneous feature selection and efficiency. The DNA-RNA interaction network helps to distinguish copy-number driven expression alterations from those that are copy-number independent. Further, our approach yields a quantitative copy-number dependency score, which distinguishes cis- versus trans-effects. When applied to a breast cancer data set, numerous expression profiles were impacted by cis-acting copy-number alterations, including several known oncogenes such as GRB7, ERBB2, and LSM1. Several trans-acting alterations were also identified, impacting genes such as ADAM2 and BAGE, which warrant further investigation. Availability: An R package named lol is available from \path=www.markowetzlab.org/software/lol.html=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2012:IBS, author = "Li-Zhi Liu and Fang-Xiang Wu and W. J. Zhang", title = "Inference of Biological {S}-System Using the Separable Estimation Method and the Genetic Algorithm", journal = j-TCBB, volume = "9", number = "4", pages = "955--965", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstruction of a biological system from its experimental time series data is a challenging task in systems biology. The S-system which consists of a group of nonlinear ordinary differential equations (ODEs) is an effective model to characterize molecular biological systems and analyze the system dynamics. However, inference of S-systems without the knowledge of system structure is not a trivial task due to its nonlinearity and complexity. In this paper, a pruning separable parameter estimation algorithm (PSPEA) is proposed for inferring S-systems. This novel algorithm combines the separable parameter estimation method (SPEM) and a pruning strategy, which includes adding an $ \ell_1 $ regularization term to the objective function and pruning the solution with a threshold value. Then, this algorithm is combined with the continuous genetic algorithm (CGA) to form a hybrid algorithm that owns the properties of these two combined algorithms. The performance of the pruning strategy in the proposed algorithm is evaluated from two aspects: the parameter estimation error and structure identification accuracy. The results show that the proposed algorithm with the pruning strategy has much lower estimation error and much higher identification accuracy than the existing method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kawano:2012:IGP, author = "Shuichi Kawano and Teppei Shimamura and Atsushi Niida and Seiya Imoto and Rui Yamaguchi and Masao Nagasaki and Ryo Yoshida and Cristin Print and Satoru Miyano", title = "Identifying Gene Pathways Associated with Cancer Characteristics via Sparse Statistical Methods", journal = j-TCBB, volume = "9", number = "4", pages = "966--972", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.48", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a statistical method for uncovering gene pathways that characterize cancer heterogeneity. To incorporate knowledge of the pathways into the model, we define a set of activities of pathways from microarray gene expression data based on the Sparse Probabilistic Principal Component Analysis (SPPCA). A pathway activity logistic regression model is then formulated for cancer phenotype. To select pathway activities related to binary cancer phenotypes, we use the elastic net for the parameter estimation and derive a model selection criterion for selecting tuning parameters included in the model estimation. Our proposed method can also reverse-engineer gene networks based on the identified multiple pathways that enables us to discover novel gene-gene associations relating with the cancer phenotypes. We illustrate the whole process of the proposed method through the analysis of breast cancer gene expression data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2012:SGE, author = "Haseong Kim and Erol Gelenbe", title = "Stochastic Gene Expression Modeling with {Hill} Function for Switch-Like Gene Responses", journal = j-TCBB, volume = "9", number = "4", pages = "973--979", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.153", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene expression models play a key role to understand the mechanisms of gene regulation whose aspects are grade and switch-like responses. Though many stochastic approaches attempt to explain the gene expression mechanisms, the Gillespie algorithm which is commonly used to simulate the stochastic models requires additional gene cascade to explain the switch-like behaviors of gene responses. In this study, we propose a stochastic gene expression model describing the switch-like behaviors of a gene by employing Hill functions to the conventional Gillespie algorithm. We assume eight processes of gene expression and their biologically appropriate reaction rates are estimated based on published literatures. We observed that the state of the system of the toggled switch model is rarely changed since the Hill function prevents the activation of involved proteins when their concentrations stay below a criterion. In ScbA-ScbR system, which can control the antibiotic metabolite production of microorganisms, our modified Gillespie algorithm successfully describes the switch-like behaviors of gene responses and oscillatory expressions which are consistent with the published experimental study.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:EFT, author = "Xin Chen and Xiaohua Hu and Tze Yee Lim and Xiajiong Shen and E. K. Park and Gail L. Rosen", title = "Exploiting the Functional and Taxonomic Structure of Genomic Data by Probabilistic Topic Modeling", journal = j-TCBB, volume = "9", number = "4", pages = "980--991", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.113", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we present a method that enable both homology-based approach and composition-based approach to further study the functional core (i.e., microbial core and gene core, correspondingly). In the proposed method, the identification of major functionality groups is achieved by generative topic modeling, which is able to extract useful information from unlabeled data. We first show that generative topic model can be used to model the taxon abundance information obtained by homology-based approach and study the microbial core. The model considers each sample as a ``document,'' which has a mixture of functional groups, while each functional group (also known as a ``latent topic'') is a weight mixture of species. Therefore, estimating the generative topic model for taxon abundance data will uncover the distribution over latent functions (latent topic) in each sample. Second, we show that, generative topic model can also be used to study the genome-level composition of ``N-mer'' features (DNA subreads obtained by composition-based approaches). The model consider each genome as a mixture of latten genetic patterns (latent topics), while each functional pattern is a weighted mixture of the ``N-mer'' features, thus the existence of core genomes can be indicated by a set of common N-mer features. After studying the mutual information between latent topics and gene regions, we provide an explanation of the functional roles of uncovered latten genetic patterns. The experimental results demonstrate the effectiveness of proposed method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gonzalez:2012:PLB, author = "Alvaro J. Gonzalez and Li Liao and Cathy H. Wu", title = "Predicting Ligand Binding Residues and Functional Sites Using Multipositional Correlations with Graph Theoretic Clustering and Kernel {CCA}", journal = j-TCBB, volume = "9", number = "4", pages = "992--1001", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.136", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a new computational method for predicting ligand binding residues and functional sites in protein sequences. These residues and sites tend to be not only conserved, but also exhibit strong correlation due to the selection pressure during evolution in order to maintain the required structure and/or function. To explore the effect of correlations among multiple positions in the sequences, the method uses graph theoretic clustering and kernel-based canonical correlation analysis (kCCA) to identify binding and functional sites in protein sequences as the residues that exhibit strong correlation between the residues' evolutionary characterization at the sites and the structure-based functional classification of the proteins in the context of a functional family. The results of testing the method on two well-curated data sets show that the prediction accuracy as measured by Receiver Operating Characteristic (ROC) scores improves significantly when multipositional correlations are accounted for.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:GEI, author = "Jianer Chen and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "9", number = "4", pages = "1002--1003", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.77", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chaudhary:2012:FLS, author = "Ruchi Chaudhary and J. Gordon Burleigh and David Fernandez-Baca", title = "Fast Local Search for Unrooted {Robinson--Foulds} Supertrees", journal = j-TCBB, volume = "9", number = "4", pages = "1004--1013", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.47", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A Robinson--Foulds (RF) supertree for a collection of input trees is a tree containing all the species in the input trees that is at minimum total RF distance to the input trees. Thus, an RF supertree is consistent with the maximum number of splits in the input trees. Constructing RF supertrees for rooted and unrooted data is NP-hard. Nevertheless, effective local search heuristics have been developed for the restricted case where the input trees and the supertree are rooted. We describe new heuristics, based on the Edge Contract and Refine (ECR) operation, that remove this restriction, thereby expanding the utility of RF supertrees. Our experimental results on simulated and empirical data sets show that our unrooted local search algorithms yield better supertrees than those obtained from MRP and rooted RF heuristics in terms of total RF distance to the input trees and, for simulated data, in terms of RF distance to the true tree.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lin:2012:MPT, author = "Yu Lin and Vaibhav Rajan and Bernard M. E. Moret", title = "A Metric for Phylogenetic Trees Based on Matching", journal = j-TCBB, volume = "9", number = "4", pages = "1014--1022", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.157", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Comparing two or more phylogenetic trees is a fundamental task in computational biology. The simplest outcome of such a comparison is a pairwise measure of similarity, dissimilarity, or distance. A large number of such measures have been proposed, but so far all suffer from problems varying from computational cost to lack of robustness; many can be shown to behave unexpectedly under certain plausible inputs. For instance, the widely used Robinson--Foulds distance is poorly distributed and thus affords little discrimination, while also lacking robustness in the face of very small changes-reattaching a single leaf elsewhere in a tree of any size can instantly maximize the distance. In this paper, we introduce a new pairwise distance measure, based on matching, for phylogenetic trees. We prove that our measure induces a metric on the space of trees, show how to compute it in low polynomial time, verify through statistical testing that it is robust, and finally note that it does not exhibit unexpected behavior under the same inputs that cause problems with other measures. We also illustrate its usefulness in clustering trees, demonstrating significant improvements in the quality of hierarchical clustering as compared to the same collections of trees clustered using the Robinson--Foulds distance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Swenson:2012:KMA, author = "Krister M. Swenson and Eric Chen and Nicholas D. Pattengale and David Sankoff", title = "The Kernel of Maximum Agreement Subtrees", journal = j-TCBB, volume = "9", number = "4", pages = "1023--1031", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.11", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A Maximum Agreement SubTree (MAST) is a largest subtree common to a set of trees and serves as a summary of common substructure in the trees. A single MAST can be misleading, however, since there can be an exponential number of MASTs, and two MASTs for the same tree set do not even necessarily share any leaves. In this paper, we introduce the notion of the Kernel Agreement SubTree (KAST), which is the summary of the common substructure in all MASTs, and show that it can be calculated in polynomial time (for trees with bounded degree). Suppose the input trees represent competing hypotheses for a particular phylogeny. We explore the utility of the KAST as a method to discern the common structure of confidence, and as a measure of how confident we are in a given tree set. We also show the trend of the KAST, as compared to other consensus methods, on the set of all trees visited during a Bayesian analysis of flatworm genomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2012:RRN, author = "Xiuwei Zhang and Bernard M. E. Moret", title = "Refining Regulatory Networks through Phylogenetic Transfer of Information", journal = j-TCBB, volume = "9", number = "4", pages = "1032--1045", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.62", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The experimental determination of transcriptional regulatory networks in the laboratory remains difficult and time-consuming, while computational methods to infer these networks provide only modest accuracy. The latter can be attributed partly to the limitations of a single-organism approach. Computational biology has long used comparative and evolutionary approaches to extend the reach and accuracy of its analyses. In this paper, we describe ProPhyC, a probabilistic phylogenetic model and associated inference algorithms, designed to improve the inference of regulatory networks for a family of organisms by using known evolutionary relationships among these organisms. ProPhyC can be used with various network evolutionary models and any existing inference method. Extensive experimental results on both biological and synthetic data confirm that our model (through its associated refinement algorithms) yields substantial improvement in the quality of inferred networks over all current methods. We also compare ProPhyC with a transfer learning approach we design. This approach also uses phylogenetic relationships while inferring regulatory networks for a family of organisms. Using similar input information but designed in a very different framework, this transfer learning approach does not perform better than ProPhyC, which indicates that ProPhyC makes good use of the evolutionary information.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hodgkinson:2012:ADM, author = "Luqman Hodgkinson and Richard M. Karp", title = "Algorithms to Detect Multiprotein Modularity Conserved during Evolution", journal = j-TCBB, volume = "9", number = "4", pages = "1046--1058", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.125", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting essential multiprotein modules that change infrequently during evolution is a challenging algorithmic task that is important for understanding the structure, function, and evolution of the biological cell. In this paper, we define a measure of modularity for interactomes and present a linear-time algorithm, Produles, for detecting multiprotein modularity conserved during evolution that improves on the running time of previous algorithms for related problems and offers desirable theoretical guarantees. We present a biologically motivated graph theoretic set of evaluation measures complementary to previous evaluation measures, demonstrate that Produles exhibits good performance by all measures, and describe certain recurrent anomalies in the performance of previous algorithms that are not detected by previous measures. Consideration of the newly defined measures and algorithm performance on these measures leads to useful insights on the nature of interactomics data and the goals of previous and current algorithms. Through randomization experiments, we demonstrate that conserved modularity is a defining characteristic of interactomes. Computational experiments on current experimentally derived interactomes for \bioname{Homo sapiens} and \bioname{Drosophila melanogaster}, combining results across algorithms, show that nearly 10 percent of current interactome proteins participate in multiprotein modules with good evidence in the protein interaction data of being conserved between human and Drosophila.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2012:PPF, author = "Jonathan Q. Jiang and Lisa J. McQuay", title = "Predicting Protein Function by Multi-Label Correlated Semi-Supervised Learning", journal = j-TCBB, volume = "9", number = "4", pages = "1059--1069", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.156", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Assigning biological functions to uncharacterized proteins is a fundamental problem in the postgenomic era. The increasing availability of large amounts of data on protein-protein interactions (PPIs) has led to the emergence of a considerable number of computational methods for determining protein function in the context of a network. These algorithms, however, treat each functional class in isolation and thereby often suffer from the difficulty of the scarcity of labeled data. In reality, different functional classes are naturally dependent on one another. We propose a new algorithm, Multi-label Correlated Semi-supervised Learning (MCSL), to incorporate the intrinsic correlations among functional classes into protein function prediction by leveraging the relationships provided by the PPI network and the functional class network. The guiding intuition is that the classification function should be sufficiently smooth on subgraphs where the respective topologies of these two networks are a good match. We encode this intuition as regularized learning with intraclass and interclass consistency, which can be understood as an extension of the graph-based learning with local and global consistency (LGC) method. Cross validation on the yeast proteome illustrates that MCSL consistently outperforms several state-of-the-art methods. Most notably, it effectively overcomes the problem associated with scarcity of label data. The supplementary files are freely available at \path=http://sites.google.com/site/csaijiang/MCSL=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:IEP, author = "Jianxin Wang and Min Li and Huan Wang and Yi Pan", title = "Identification of Essential Proteins Based on Edge Clustering Coefficient", journal = j-TCBB, volume = "9", number = "4", pages = "1070--1080", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.147", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of essential proteins is key to understanding the minimal requirements for cellular life and important for drug design. The rapid increase of available protein-protein interaction (PPI) data has made it possible to detect protein essentiality on network level. A series of centrality measures have been proposed to discover essential proteins based on network topology. However, most of them tended to focus only on the location of singleprotein, but ignored the relevance between interactions and protein essentiality. In this paper, a new centrality measure for identifying essential proteins based on edge clustering coefficient, named as NC, is proposed. Different from previous centrality measures, NC considers both the centrality of a node and the relationship between it and its neighbors. For each interaction in the network, we calculate its edge clustering coefficient. A node's essentiality is determined by the sum of the edge clustering coefficients of interactions connecting it and its neighbors. The new centrality measure NC takes into account the modular nature of protein essentiality. NC is applied to three different types of yeast protein-protein interaction networks, which are obtained from the DIP database, the MIPS database and the BioGRID database, respectively. The experimental results on the three different networks show that the number of essential proteins discovered by NC universally exceeds that discovered by the six other centrality measures: DC, BC, CC, SC, EC, and IC. Moreover, the essential proteins discovered by NC show significant cluster effect.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jang:2012:CMP, author = "Woo-Hyuk Jang and Suk-Hoon Jung and Dong-Soo Han", title = "A Computational Model for Predicting Protein Interactions Based on Multidomain Collaboration", journal = j-TCBB, volume = "9", number = "4", pages = "1081--1090", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.55", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, several domain-based computational models for predicting protein-protein interactions (PPIs) have been proposed. The conventional methods usually infer domain or domain combination (DC) interactions from already known interacting sets of proteins, and then predict PPIs using the information. However, the majority of these models often have limitations in providing detailed information on which domain pair (single domain interaction) or DC pair (multidomain interaction) will actually interact for the predicted protein interaction. Therefore, a more comprehensive and concrete computational model for the prediction of PPIs is needed. We developed a computational model to predict PPIs using the information of intraprotein domain cohesion and interprotein DC coupling interaction. A method of identifying the primary interacting DC pair was also incorporated into the model in order to infer actual participants in a predicted interaction. Our method made an apparent improvement in the PPI prediction accuracy, and the primary interacting DC pair identification was valid specifically in predicting multidomain protein interactions. In this paper, we demonstrate that (1) the intraprotein domain cohesion is meaningful in improving the accuracy of domain-based PPI prediction, (2) a prediction model incorporating the intradomain cohesion enables us to identify the primary interacting DC pair, and (3) a hybrid approach using the intra/interdomain interaction information can lead to a more accurate prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Choi:2012:HAS, author = "Ickwon Choi and Michael W. Kattan and Brian J. Wells and Changhong Yu", title = "A Hybrid Approach to Survival Model Building Using Integration of Clinical and Molecular Information in Censored Data", journal = j-TCBB, volume = "9", number = "4", pages = "1091--1105", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.31", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In medical society, the prognostic models, which use clinicopathologic features and predict prognosis after a certain treatment, have been externally validated and used in practice. In recent years, most research has focused on high dimensional genomic data and small sample sizes. Since clinically similar but molecularly heterogeneous tumors may produce different clinical outcomes, the combination of clinical and genomic information, which may be complementary, is crucial to improve the quality of prognostic predictions. However, there is a lack of an integrating scheme for clinic-genomic models due to the $ {\rm P} \gg {\rm N} $ problem, in particular, for a parsimonious model. We propose a methodology to build a reduced yet accurate integrative model using a hybrid approach based on the Cox regression model, which uses several dimension reduction techniques, $ {\rm L}_2 $ penalized maximum likelihood estimation (PMLE), and resampling methods to tackle the problem. The predictive accuracy of the modeling approach is assessed by several metrics via an independent and thorough scheme to compare competing methods. In breast cancer data studies on a metastasis and death event, we show that the proposed methodology can improve prediction accuracy and build a final model with a hybrid signature that is parsimonious when integrating both types of variables.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lazar:2012:SFT, author = "Cosmin Lazar and Jonatan Taminau and Stijn Meganck and David Steenhoff and Alain Coletta and Colin Molter and Virginie de Schaetzen and Robin Duque and Hugues Bersini and Ann Nowe", title = "A Survey on Filter Techniques for Feature Selection in Gene Expression Microarray Analysis", journal = j-TCBB, volume = "9", number = "4", pages = "1106--1119", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.33", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A plenitude of feature selection (FS) methods is available in the literature, most of them rising as a need to analyze data of very high dimension, usually hundreds or thousands of variables. Such data sets are now available in various application areas like combinatorial chemistry, text mining, multivariate imaging, or bioinformatics. As a general accepted rule, these methods are grouped in filters, wrappers, and embedded methods. More recently, a new group of methods has been added in the general framework of FS: ensemble techniques. The focus in this survey is on filter feature selection methods for informative feature discovery in gene expression microarray (GEM) analysis, which is also known as differentially expressed genes (DEGs) discovery, gene prioritization, or biomarker discovery. We present them in a unified framework, using standardized notations in order to reveal their technical details and to highlight their common characteristics as well as their particularities.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Felicioli:2012:BEA, author = "Claudio Felicioli and Roberto Marangoni", title = "{BpMatch}: An Efficient Algorithm for a Segmental Analysis of Genomic Sequences", journal = j-TCBB, volume = "9", number = "4", pages = "1120--1127", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.30", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Here, we propose BpMatch: an algorithm that, working on a suitably modified suffix-tree data structure, is able to compute, in a fast and efficient way, the coverage of a source sequence $S$ on a target sequence $T$, by taking into account direct and reverse segments, eventually overlapped. Using BpMatch, the operator should define a priori, the minimum length $l$ of a segment and the minimum number of occurrences minRep, so that only segments longer than $l$ and having a number of occurrences greater than minRep are considered to be significant. BpMatch outputs the significant segments found and the computed segment-based distance. On the worst case, assuming the alphabet dimension $d$ is a constant, the time required by BpMatch to calculate the coverage is $ O(l^2 n) $. On the average, by setting $ l \ge 2 \log_d(n) $, the time required to calculate the coverage is only $ O(n) $. BpMatch, thanks to the minRep parameter, can also be used to perform a self-covering: to cover a sequence using segments coming from itself, by avoiding the trivial solution of having a single segment coincident with the whole sequence. The result of the self-covering approach is a spectral representation of the repeats contained in the sequence. BpMatch is freely available on: www.sourceforge.net/projects/bpmatch/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Willson:2012:CHB, author = "Stephen Willson", title = "{CSD} Homomorphisms between Phylogenetic Networks", journal = j-TCBB, volume = "9", number = "4", pages = "1128--1138", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.52", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Since Darwin, species trees have been used as a simplified description of the relationships which summarize the complicated network $N$ of reality. Recent evidence of hybridization and lateral gene transfer, however, suggest that there are situations where trees are inadequate. Consequently it is important to determine properties that characterize networks closely related to $N$ and possibly more complicated than trees but lacking the full complexity of $N$. A connected surjective digraph map (CSD) is a map $f$ from one network $N$ to another network $M$ such that every arc is either collapsed to a single vertex or is taken to an arc, such that $f$ is surjective, and such that the inverse image of a vertex is always connected. CSD maps are shown to behave well under composition. It is proved that if there is a CSD map from $N$ to $M$, then there is a way to lift an undirected version of $M$ into $N$, often with added resolution. A CSD map from $N$ to $M$ puts strong constraints on $N$. In general, it may be useful to study classes of networks such that, for any $N$, there exists a CSD map from $N$ to some standard member of that class.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sengupta:2012:NDP, author = "Soumi Sengupta and Sanghamitra Bandyopadhyay", title = "De Novo Design of Potential {RecA} Inhibitors Using {MultiObjective} Optimization", journal = j-TCBB, volume = "9", number = "4", pages = "1139--1154", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.35", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "De novo ligand design involves optimization of several ligand properties such as binding affinity, ligand volume, drug likeness, etc. Therefore, optimization of these properties independently and simultaneously seems appropriate. In this paper, the ligand design problem is modeled in a multiobjective using Archived MultiObjective Simulated Annealing (AMOSA) as the underlying search algorithm. The multiple objectives considered are the energy components similarity to a known inhibitor and a novel drug likeliness measure based on Lipinski's rule of five. RecA protein of Mycobacterium tuberculosis, causative agent of tuberculosis, is taken as the target for the drug design. To gauge the goodness of the results, they are compared to the outputs of LigBuilder, NEWLEAD, and Variable genetic algorithm (VGA). The same problem has also been modeled using a well-established genetic algorithm-based multiobjective optimization technique, Nondominated Sorting Genetic Algorithm-II (NSGA-II), to find the efficacy of AMOSA through comparative analysis. Results demonstrate that while some small molecules designed by the proposed approach are remarkably similar to the known inhibitors of RecA, some new ones are discovered that may be potential candidates for novel lead molecules against tuberculosis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:DOR, author = "Peng Chen and Limsoon Wong and Jinyan Li", title = "Detection of Outlier Residues for Improving Interface Prediction in Protein Heterocomplexes", journal = j-TCBB, volume = "9", number = "4", pages = "1155--1165", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.58", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sequence-based understanding and identification of protein binding interfaces is a challenging research topic due to the complexity in protein systems and the imbalanced distribution between interface and noninterface residues. This paper presents an outlier detection idea to address the redundancy problem in protein interaction data. The cleaned training data are then used for improving the prediction performance. We use three novel measures to describe the extent a residue is considered as an outlier in comparison to the other residues: the distance of a residue instance from the center instance of all residue instances of the same class label (Dist), the probability of the class label of the residue instance (PCL), and the importance of within-class and between-class (IWB) residue instances. Outlier scores are computed by integrating the three factors; instances with a sufficiently large score are treated as outliers and removed. The data sets without outliers are taken as input for a support vector machine (SVM) ensemble. The proposed SVM ensemble trained on input data without outliers performs better than that with outliers. Our method is also more accurate than many literature methods on benchmark data sets. From our empirical studies, we found that some outlier interface residues are truly near to noninterface regions, and some outlier noninterface residues are close to interface regions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mirceva:2012:EAR, author = "Georgina Mirceva and Ivana Cingovska and Zoran Dimov and Danco Davcev", title = "Efficient Approaches for Retrieving Protein Tertiary Structures", journal = j-TCBB, volume = "9", number = "4", pages = "1166--1179", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The 3D conformation of a protein in the space is the main factor which determines its function in living organisms. Due to the huge amount of newly discovered proteins, there is a need for fast and accurate computational methods for retrieving protein structures. Their purpose is to speed up the process of understanding the structure-to-function relationship which is crucial in the development of new drugs. There are many algorithms addressing the problem of protein structure retrieval. In this paper, we present several novel approaches for retrieving protein tertiary structures. We present our voxel-based descriptor. Then we present our protein ray-based descriptors which are applied on the interpolated protein backbone. We introduce five novel wavelet descriptors which perform wavelet transforms on the protein distance matrix. We also propose an efficient algorithm for distance matrix alignment named Matrix Alignment by Sequence Alignment within Sliding Window (MASASW), which has shown as much faster than DALI, CE, and MatAlign. We compared our approaches between themselves and with several existing algorithms, and they generally prove to be fast and accurate. MASASW achieves the highest accuracy. The ray and wavelet-based descriptors as well as MASASW are more accurate than CE.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DeFrancesco:2012:EGE, author = "Nicoletta {De Francesco} and Giuseppe Lettieri and Luca Martini", title = "Efficient Genotype Elimination via Adaptive Allele Consolidation", journal = j-TCBB, volume = "9", number = "4", pages = "1180--1189", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.46", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose the technique of Adaptive Allele Consolidation, that greatly improves the performance of the Lange-Goradia algorithm for genotype elimination in pedigrees, while still producing equivalent output. Genotype elimination consists in removing from a pedigree those genotypes that are impossible according to the Mendelian law of inheritance. This is used to find errors in genetic data and is useful as a preprocessing step in other analyses (such as linkage analysis or haplotype imputation). The problem of genotype elimination is intrinsically combinatorial, and Allele Consolidation is an existing technique where several alleles are replaced by a single ``lumped'' allele in order to reduce the number of combinations of genotypes that have to be considered, possibly at the expense of precision. In existing Allele Consolidation techniques, alleles are lumped once and for all before performing genotype elimination. The idea of Adaptive Allele Consolidation is to dynamically change the set of alleles that are lumped together during the execution of the Lange-Goradia algorithm, so that both high performance and precision are achieved. We have implemented the technique in a tool called Celer and evaluated it on a large set of scenarios, with good results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2012:HSP, author = "Yijia Zhang and Hongfei Lin and Zhihao Yang and Jian Wang and Yanpeng Li", title = "Hash Subgraph Pairwise Kernel for Protein-Protein Interaction Extraction", journal = j-TCBB, volume = "9", number = "4", pages = "1190--1202", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.50", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extracting protein-protein interaction (PPI) from biomedical literature is an important task in biomedical text mining (BioTM). In this paper, we propose a hash subgraph pairwise (HSP) kernel-based approach for this task. The key to the novel kernel is to use the hierarchical hash labels to express the structural information of subgraphs in a linear time. We apply the graph kernel to compute dependency graphs representing the sentence structure for protein-protein interaction extraction task, which can efficiently make use of full graph structural information, and particularly capture the contiguous topological and label information ignored before. We evaluate the proposed approach on five publicly available PPI corpora. The experimental results show that our approach significantly outperforms all-path kernel approach on all five corpora and achieves state-of-the-art performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Noor:2012:IGR, author = "Amina Noor and Erchin Serpedin and Mohamed Nounou and Hazem Nounou", title = "Inferring Gene Regulatory Networks via Nonlinear State-Space Models and Exploiting Sparsity", journal = j-TCBB, volume = "9", number = "4", pages = "1203--1211", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.32", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper considers the problem of learning the structure of gene regulatory networks from gene expression time series data. A more realistic scenario when the state space model representing a gene network evolves nonlinearly is considered while a linear model is assumed for the microarray data. To capture the nonlinearity, a particle filter-based state estimation algorithm is considered instead of the contemporary linear approximation-based approaches. The parameters characterizing the regulatory relations among various genes are estimated online using a Kalman filter. Since a particular gene interacts with a few other genes only, the parameter vector is expected to be sparse. The state estimates delivered by the particle filter and the observed microarray data are then subjected to a LASSO-based least squares regression operation which yields a parsimonious and efficient description of the regulatory network by setting the irrelevant coefficients to zero. The performance of the aforementioned algorithm is compared with the extended Kalman filter (EKF) and Unscented Kalman Filter (UKF) employing the Mean Square Error (MSE) as the fidelity criterion in recovering the parameters of gene regulatory networks from synthetic data and real biological data. Extensive computer simulations illustrate that the proposed particle filter-based network inference algorithm outperforms EKF and UKF, and therefore, it can serve as a natural framework for modeling gene regulatory networks with nonlinear and sparse structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2012:PRP, author = "Chao Yang and Zengyou He and Can Yang and Weichuan Yu", title = "Peptide Reranking with Protein-Peptide Correspondence and Precursor Peak Intensity Information", journal = j-TCBB, volume = "9", number = "4", pages = "1212--1219", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.29", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Searching tandem mass spectra against a protein database has been a mainstream method for peptide identification. Improving peptide identification results by ranking true Peptide-Spectrum Matches (PSMs) over their false counterparts leads to the development of various reranking algorithms. In peptide reranking, discriminative information is essential to distinguish true PSMs from false PSMs. Generally, most peptide reranking methods obtain discriminative information directly from database search scores or by training machine learning models. Information in the protein database and MS1 spectra (i.e., single stage MS spectra) is ignored. In this paper, we propose to use information in the protein database and MS1 spectra to rerank peptide identification results. To quantitatively analyze their effects to peptide reranking results, three peptide reranking methods are proposed: PPMRanker, PPIRanker, and MIRanker. PPMRanker only uses Protein-Peptide Map (PPM) information from the protein database, PPIRanker only uses Precursor Peak Intensity (PPI) information, and MIRanker employs both PPM information and PPI information. According to our experiments on a standard protein mixture data set, a human data set and a mouse data set, PPMRanker and MIRanker achieve better peptide reranking results than PetideProphet, PeptideProphet+NSP (number of sibling peptides) and a score regularization method SRPI. The source codes of PPMRanker, PPIRanker, and MIRanker, and all supplementary documents are available at our website: \path=http://bioinformatics.ust.hk/pepreranking/=. Alternatively, these documents can also be downloaded from: \path=http://sourceforge.net/projects/pepreranking/=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2012:SFU, author = "Haitao Jiang and Chunfang Zheng and David Sankoff and Binhai Zhu", title = "Scaffold Filling under the Breakpoint and Related Distances", journal = j-TCBB, volume = "9", number = "4", pages = "1220--1229", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.57", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Motivated by the trend of genome sequencing without completing the sequence of the whole genomes, a problem on filling an incomplete multichromosomal genome (or scaffold) I with respect to a complete target genome $G$ was studied. The objective is to minimize the resulting genomic distance between $ I' $ and $G$, where $ I' $ is the corresponding filled scaffold. We call this problem the one-sided scaffold filling problem. In this paper, we conduct a systematic study for the scaffold filling problem under the breakpoint distance and its variants, for both unichromosomal and multichromosomal genomes (with and without gene repetitions). When the input genome contains no gene repetition (i.e., is a fragment of a permutation), we show that the two-sided scaffold filling problem (i.e., $G$ is also incomplete) is polynomially solvable for unichromosomal genomes under the breakpoint distance and for multichromosomal genomes under the genomic (or DCJ-Double-Cut-and-Join) distance. However, when the input genome contains some repeated genes, even the one-sided scaffold filling problem becomes NP-complete when the similarity measure is the maximum number of adjacencies between two sequences. For this problem, we also present efficient constant-factor approximation algorithms: factor-2 for the general case and factor 1.33 for the one-sided case.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pal:2012:TDR, author = "Ranadip Pal and Sonal Bhattacharya", title = "Transient Dynamics of Reduced-Order Models of Genetic Regulatory Networks", journal = j-TCBB, volume = "9", number = "4", pages = "1230--1244", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.37", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In systems biology, a number of detailed genetic regulatory networks models have been proposed that are capable of modeling the fine-scale dynamics of gene expression. However, limitations on the type and sampling frequency of experimental data often prevent the parameter estimation of the detailed models. Furthermore, the high computational complexity involved in the simulation of a detailed model restricts its use. In such a scenario, reduced-order models capturing the coarse-scale behavior of the network are frequently applied. In this paper, we analyze the dynamics of a reduced-order Markov Chain model approximating a detailed Stochastic Master Equation model. Utilizing a reduction mapping that maintains the aggregated steady-state probability distribution of stochastic master equation models, we provide bounds on the deviation of the Markov Chain transient distribution from the transient aggregated distributions of the stochastic master equation model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Torres:2012:UGE, author = "Jose Salavert Torres and Ignacio Blanquer Espert and Andres Tomas Dominguez and Vicente Hernendez and Ignacio Medina and Joaquin Terraga and Joaquin Dopazo", title = "Using {GPUs} for the Exact Alignment of Short-Read Genetic Sequences by Means of the {Burrows--Wheeler} Transform", journal = j-TCBB, volume = "9", number = "4", pages = "1245--1256", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.49", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "General Purpose Graphic Processing Units (GPGPUs) constitute an inexpensive resource for computing-intensive applications that could exploit an intrinsic fine-grain parallelism. This paper presents the design and implementation in GPGPUs of an exact alignment tool for nucleotide sequences based on the Burrows--Wheeler Transform. We compare this algorithm with state-of-the-art implementations of the same algorithm over standard CPUs, and considering the same conditions in terms of I/O. Excluding disk transfers, the implementation of the algorithm in GPUs shows a speedup larger than $ 12 \times $, when compared to CPU execution. This implementation exploits the parallelism by concurrently searching different sequences on the same reference search tree, maximizing memory locality and ensuring a symmetric access to the data. The paper describes the behavior of the algorithm in GPU, showing a good scalability in the performance, only limited by the size of the GPU inner memory.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2012:NUF, author = "Shaohong Zhang and Hau-San Wong and Ying Shen and Dongqing Xie", title = "A New Unsupervised Feature Ranking Method for Gene Expression Data Based on Consensus Affinity", journal = j-TCBB, volume = "9", number = "4", pages = "1257--1263", month = jul, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.34", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 25 16:09:45 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Feature selection is widely established as one of the fundamental computational techniques in mining microarray data. Due to the lack of categorized information in practice, unsupervised feature selection is more practically important but correspondingly more difficult. Motivated by the cluster ensemble techniques, which combine multiple clustering solutions into a consensus solution of higher accuracy and stability, recent efforts in unsupervised feature selection proposed to use these consensus solutions as oracles. However, these methods are dependent on both the particular cluster ensemble algorithm used and the knowledge of the true cluster number. These methods will be unsuitable when the true cluster number is not available, which is common in practice. In view of the above problems, a new unsupervised feature ranking method is proposed to evaluate the importance of the features based on consensus affinity. Different from previous works, our method compares the corresponding affinity of each feature between a pair of instances based on the consensus matrix of clustering solutions. As a result, our method alleviates the need to know the true number of clusters and the dependence on particular cluster ensemble approaches as in previous works. Experiments on real gene expression data sets demonstrate significant improvement of the feature ranking results when compared to several state-of-the-art techniques.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:GEA, author = "Yi-Ping Phoebe Chen", title = "Guest Editorial: Application and Development of Bioinformatics", journal = j-TCBB, volume = "9", number = "5", pages = "1265", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.96", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Handoko:2012:QAA, author = "Stephanus Daniel Handoko and Xuchang Ouyang and Chinh Tran To Su and Chee Keong Kwoh and Yew Soon Ong", title = "{QuickVina}: Accelerating {AutoDock Vina} Using Gradient-Based Heuristics for Global Optimization", journal = j-TCBB, volume = "9", number = "5", pages = "1266--1272", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.82", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Predicting binding between macromolecule and small molecule is a crucial phase in the field of rational drug design. AutoDock Vina, one of the most widely used docking software released in 2009, uses an empirical scoring function to evaluate the binding affinity between the molecules and employs the iterated local search global optimizer for global optimization, achieving a significantly improved speed and better accuracy of the binding mode prediction compared its predecessor, AutoDock 4. In this paper, we propose further improvement in the local search algorithm of Vina by heuristically preventing some intermediate points from undergoing local search. Our improved version of Vina-dubbed QVina-achieved a maximum acceleration of about 25 times with the average speed-up of 8.34 times compared to the original Vina when tested on a set of 231 protein-ligand complexes while maintaining the optimal scores mostly identical. Using our heuristics, larger number of different ligands can be quickly screened against a given receptor within the same time frame.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2012:IXT, author = "Pengyi Yang and Jie Ma and Penghao Wang and Yunping Zhu and Bing B. Zhou and Yee Hwa Yang", title = "Improving {X!Tandem} on Peptide Identification from Mass Spectrometry by Self-Boosted Percolator", journal = j-TCBB, volume = "9", number = "5", pages = "1273--1280", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.86", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A critical component in mass spectrometry (MS)-based proteomics is an accurate protein identification procedure. Database search algorithms commonly generate a list of peptide-spectrum matches (PSMs). The validity of these PSMs is critical for downstream analysis since proteins that are present in the sample are inferred from those PSMs. A variety of postprocessing algorithms have been proposed to validate and filter PSMs. Among them, the most popular ones include a semi-supervised learning (SSL) approach known as Percolator and an empirical modeling approach known as PeptideProphet. However, they are predominantly designed for commercial database search algorithms, i.e., SEQUEST and MASCOT. Therefore, it is highly desirable to extend and optimize those PSM postprocessing algorithms for open source database search algorithms such as X!Tandem. In this paper, we propose a Self-boosted Percolator for postprocessing X!Tandem search results. We find that the SSL algorithm utilized by Percolator depends heavily on the initial ranking of PSMs. Starting with a poor PSM ranking list may cause Percolator to perform suboptimally. By implementing Percolator in a cascade learning manner, we can progressively improve the performance through multiple boost runs, enabling many more PSM identifications without sacrificing false discovery rate (FDR).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wan:2012:CAD, author = "Lin Wan and Fengzhu Sun", title = "{CEDER}: Accurate Detection of Differentially Expressed Genes by Combining Significance of Exons Using {RNA-Seq}", journal = j-TCBB, volume = "9", number = "5", pages = "1281--1292", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.83", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA-Seq is widely used in transcriptome studies, and the detection of differentially expressed genes (DEGs) between two classes of individuals, e.g., cases versus controls, using RNA-Seq is of fundamental importance. Many statistical methods for DEG detection based on RNA-Seq data have been developed and most of them are based on the read counts mapped to individual genes. On the other hand, genes are composed of exons and the distribution of reads for the different exons can be heterogeneous. We hypothesize that the detection accuracy of differentially expressed genes can be increased by analyzing individual exons within a gene and then combining the results of the exons. We therefore developed a novel program, termed CEDER, to accurately detect DEGs by combining the significance of the exons. CEDER first tests for differentially expressed exons yielding a p-value for each, and then gives a score indicating the potential for a gene to be differentially expressed by integrating the p-values of the exons in the gene. We showed that CEDER can significantly increase the accuracy of existing methods for detecting DEGs on two benchmark RNA-Seq data sets and simulated datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rottger:2012:HLD, author = "Richard Rottger and Ulrich Ruckert and Jan Taubert and Jan Baumbach", title = "How Little Do We Actually Know? {On} the Size of Gene Regulatory Networks", journal = j-TCBB, volume = "9", number = "5", pages = "1293--1300", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.71", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The National Center for Biotechnology Information (NCBI) recently announced the availability of whole genome sequences for more than 1,000 species. And the number of sequenced individual organisms is growing. Ongoing improvement of DNA sequencing technology will further contribute to this, enabling large-scale evolution and population genetics studies. However, the availability of sequence information is only the first step in understanding how cells survive, reproduce, and adjust their behavior. The genetic control behind organized development and adaptation of complex organisms still remains widely undetermined. One major molecular control mechanism is transcriptional gene regulation. The direct juxtaposition of the total number of sequenced species to the handful of model organisms with known regulations is surprising. Here, we investigate how little we even know about these model organisms. We aim to predict the sizes of the whole-organism regulatory networks of seven species. In particular, we provide statistical lower bounds for the expected number of regulations. For Escherichia coli we estimate at most 37 percent of the expected gene regulatory interactions to be already discovered, 24 percent for Bacillus subtilis, and $<$ 3\% human, respectively. We conclude that even for our best researched model organisms we still lack substantial understanding of fundamental molecular control mechanisms, at least on a large scale.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ashtawy:2012:CAR, author = "Hossam M. Ashtawy and Nihar R. Mahapatra", title = "A Comparative Assessment of Ranking Accuracies of Conventional and Machine-Learning-Based Scoring Functions for Protein-Ligand Binding Affinity Prediction", journal = j-TCBB, volume = "9", number = "5", pages = "1301--1313", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.36", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurately predicting the binding affinities of large sets of protein-ligand complexes efficiently is a key challenge in computational biomolecular science, with applications in drug discovery, chemical biology, and structural biology. Since a scoring function (SF) is used to score, rank, and identify drug leads, the fidelity with which it predicts the affinity of a ligand candidate for a protein's binding site has a significant bearing on the accuracy of virtual screening. Despite intense efforts in developing conventional SFs, which are either force-field based, knowledge-based, or empirical, their limited ranking accuracy has been a major roadblock toward cost-effective drug discovery. Therefore, in this work, we explore a range of novel SFs employing different machine-learning (ML) approaches in conjunction with a variety of physicochemical and geometrical features characterizing protein-ligand complexes. We assess the ranking accuracies of these new ML-based SFs as well as those of conventional SFs in the context of the 2007 and 2010 PDBbind benchmark data sets on both diverse and protein-family-specific test sets. We also investigate the influence of the size of the training data set and the type and number of features used on ranking accuracy. Within clusters of protein-ligand complexes with different ligands bound to the same target protein, we find that the best ML-based SF is able to rank the ligands correctly based on their experimentally determined binding affinities 62.5 percent of the time and identify the top binding ligand 78.1 percent of the time. For this SF, the Spearman correlation coefficient between ranks of ligands ordered by predicted and experimentally determined binding affinities is 0.771. Given the challenging nature of the ranking problem and that SFs are used to screen millions of ligands, this represents a significant improvement over the best conventional SF we studied, for which the corresponding ranking performance values are 57.8 percent, 73.4 percent, and 0.677.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fages:2012:GEI, author = "Fran{\c{c}}ois Fages and Sylvain Soliman", title = "{Guest Editors}' Introduction to the Special Section on Computational Methods in Systems Biology", journal = j-TCBB, volume = "9", number = "5", pages = "1314--1315", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.97", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Omony:2012:EDS, author = "Jimmy Omony and Astrid R. Mach-Aigner and Leo H. de Graaff and Gerrit van Straten and Anton J. B. van Boxtel", title = "Evaluation of Design Strategies for Time Course Experiments in Genetic Networks: Case Study of the {XlnR} Regulon in \bioname{Aspergillus niger}", journal = j-TCBB, volume = "9", number = "5", pages = "1316--1325", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.59", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the challenges in genetic network reconstruction is finding experimental designs that maximize the information content in a data set. In this paper, the information value of mRNA transcription time course experiments was used to compare experimental designs. The study concerns the dynamic response of genes in the XlnR regulon of Aspergillus niger, with the goal to find the best moment in time to administer an extra pulse of inducing D-xylose. Low and high D-xylose pulses were used to perturb the XlnR regulon. Evaluation of the experimental methods was based on simulation of the regulon. Models that govern the regulation of the target genes in this regulon were used for the simulations. Parameter sensitivity analysis, the Fisher Information Matrix (FIM) and the modified E-criterion were used to assess the design performances. The results show that the best time to give a second D-xylose pulse is when the D-xylose concentration from the first pulse has not yet completely faded away. Due to the presence of a repression effect the strength of the second pulse must be optimized, rather than maximized. The results suggest that the modified E-criterion is a better metric than the sum of integrals of absolute sensitivity for comparing alternative designs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Czeizler:2012:PHS, author = "Eugen Czeizler and Vladimir Rogojin and Ion Petre", title = "The Phosphorylation of the Heat Shock Factor as a Modulator for the Heat Shock Response", journal = j-TCBB, volume = "9", number = "5", pages = "1326--1337", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.66", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The heat shock response is a well-conserved defence mechanism against the accumulation of misfolded proteins due to prolonged elevated heat. The cell responds to heat shock by raising the levels of heat shock proteins (hsp), which are responsible for chaperoning protein refolding. The synthesis ofhspis highly regulated at the transcription level by specific heat shock (transcription) factors (hsf). One of the regulation mechanisms is the phosphorylation ofhsf's. Experimental evidence shows a connection between the hyper-phosphorylation ofhsfs and the transactivation of thehsp-encoding genes. In this paper, we incorporate several (de)phosphorylation pathways into an existing well-validated computational model of the heat shock response. We analyze the quantitative control of each of these pathways over the entire process. For each of these pathways we create detailed computational models which we subject to parameter estimation in order to fit them to existing experimental data. In particular, we find conclusive evidence supporting only one of the analyzed pathways. Also, we corroborate our results with a set of computational models of a more reduced size.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Klarner:2012:TSD, author = "Hannes Klarner and Heike Siebert and Alexander Bockmayr", title = "Time Series Dependent Analysis of Unparametrized {Thomas} Networks", journal = j-TCBB, volume = "9", number = "5", pages = "1338--1351", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.61", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper is concerned with the analysis of labeled Thomas networks using discrete time series. It focuses on refining the given edge labels and on assessing the data quality. The results are aimed at being exploitable for experimental design and include the prediction of new activatory or inhibitory effects of given interactions and yet unobserved oscillations of specific components in between specific sampling intervals. On the formal side, we generalize the concept of edge labels and introduce a discrete time series interpretation. This interpretation features two original concepts: (1) Incomplete measurements are admissible, and (2) it allows qualitative assumptions about the changes in gene expression by means of monotonicity. On the computational side, we provide a Python script, {\tt erda.py}, that automates the suggested workflow by model checking and constraint satisfaction. We illustrate the workflow by investigating the yeast network IRMA.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Palaniappan:2012:HFF, author = "Sucheendra K. Palaniappan and S. Akshay and Bing Liu and Blaise Genest and P. S. Thiagarajan", title = "A Hybrid Factored Frontier Algorithm for Dynamic {Bayesian} Networks with a Biopathways Application", journal = j-TCBB, volume = "9", number = "5", pages = "1352--1365", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.60", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Dynamic Bayesian Networks (DBNs) can serve as succinct probabilistic dynamic models of biochemical networks [CHECK END OF SENTENCE]. To analyze these models, one must compute the probability distribution over system states at a given time point. Doing this exactly is infeasible for large models; hence one must use approximate algorithms. The Factored Frontier algorithm (FF) is one such algorithm [CHECK END OF SENTENCE]. However FF as well as the earlier Boyen-Koller (BK) algorithm [CHECK END OF SENTENCE] can incur large errors. To address this, we present a new approximate algorithm called the Hybrid Factored Frontier (HFF) algorithm. At each time slice, in addition to maintaining probability distributions over local states-as FF does-HFF explicitly maintains the probabilities of a number of global states called spikes. When the number of spikes is 0, we get FF and with all global states as spikes, we get the exact inference algorithm. We show that by increasing the number of spikes one can reduce errors while the additional computational effort required is only quadratic in the number of spikes. We validated the performance of HFF on large DBN models of biopathways. Each pathway has more than 30 species and the corresponding DBN has more than 3,000 nodes. Comparisons with FF and BK show that HFF is a useful and powerful approximate inferencing algorithm for DBNs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Paoletti:2012:MCM, author = "Nicola Paoletti and Pietro Lio and Emanuela Merelli and Marco Viceconti", title = "Multilevel Computational Modeling and Quantitative Analysis of Bone Remodeling", journal = j-TCBB, volume = "9", number = "5", pages = "1366--1378", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.51", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Our work focuses on bone remodeling with a multiscale breadth that ranges from modeling intracellular and intercellular RANK/RANKL signaling to tissue dynamics, by developing a multilevel modeling framework. Several important findings provide clear evidences of the multiscale properties of bone formation and of the links between RANK/RANKL and bone density in healthy and disease conditions. Recent studies indicate that the circulating levels of OPG and RANKL are inversely related to bone turnover and Bone Mineral Density (BMD) and contribute to the development of osteoporosis in postmenopausal women, and thalassemic patients. We make use of a spatial process algebra, the Shape Calculus, to control stochastic cell agents that are continuously remodeling the bone. We found that our description is effective for such a multiscale, multilevel process and that RANKL signaling small dynamic concentration defects are greatly amplified by the continuous alternation of absorption and formation resulting in large structural bone defects. This work contributes to the computational modeling of complex systems with a multilevel approach connecting formal languages and agent-based simulation tools.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hanczar:2012:NMC, author = "Blaise Hanczar and Avner Bar-Hen", title = "A New Measure of Classifier Performance for Gene Expression Data", journal = j-TCBB, volume = "9", number = "5", pages = "1379--1386", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.21", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the major aims of many microarray experiments is to build discriminatory diagnosis and prognosis models. A large number of supervised methods have been proposed in literature for microarray-based classification for this purpose. Model evaluation and comparison is a critical issue and, the most of the time, is based on the classification cost. This classification cost is based on the costs of false positives and false negative, that are generally unknown in diagnostics problems. This uncertainty may highly impact the evaluation and comparison of the classifiers. We propose a new measure of classifier performance that takes account of the uncertainty of the error. We represent the available knowledge about the costs by a distribution function defined on the ratio of the costs. The performance of a classifier is therefore computed over the set of all possible costs weighted by their probability distribution. Our method is tested on both artificial and real microarray data sets. We show that the performance of classifiers is very depending of the ratio of the classification costs. In many cases, the best classifier can be identified by our new measure whereas the classic error measures fail.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kamath:2012:EAA, author = "Uday Kamath and Jack Compton and Rezarta Islamaj Dogan and Kenneth De Jong and Amarda Shehu", title = "An Evolutionary Algorithm Approach for Feature Generation from Sequence Data and Its Application to {DNA} Splice Site Prediction", journal = j-TCBB, volume = "9", number = "5", pages = "1387--1398", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.53", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Associating functional information with biological sequences remains a challenge for machine learning methods. The performance of these methods often depends on deriving predictive features from the sequences sought to be classified. Feature generation is a difficult problem, as the connection between the sequence features and the sought property is not known a priori. It is often the task of domain experts or exhaustive feature enumeration techniques to generate a few features whose predictive power is then tested in the context of classification. This paper proposes an evolutionary algorithm to effectively explore a large feature space and generate predictive features from sequence data. The effectiveness of the algorithm is demonstrated on an important component of the gene-finding problem, DNA splice site prediction. This application is chosen due to the complexity of the features needed to obtain high classification accuracy and precision. Our results test the effectiveness of the obtained features in the context of classification by Support Vector Machines and show significant improvement in accuracy and precision over state-of-the-art approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ansari:2012:DPS, author = "Nadeem A. Ansari and Riyue Bao and Calin Voichita and Sorin Draghici", title = "Detecting Phenotype-Specific Interactions between Biological Processes from Microarray Data and Annotations", journal = j-TCBB, volume = "9", number = "5", pages = "1399--1409", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.65", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High throughput technologies enable researchers to measure expression levels on a genomic scale. However, the correct and efficient biological interpretation of such voluminous data remains a challenging problem. Many tools have been developed for the analysis of GO terms that are over- or under-represented in a list of differentially expressed genes. However, a previously unexplored aspect is the identification of changes in the way various biological processes interact in a given condition with respect to a reference. Here, we present a novel approach that aims at identifying such interactions between biological processes that are significantly different in a given phenotype with respect to normal. The proposed technique uses vector-space representation, SVD-based dimensionality reduction, differential weighting, and bootstrapping to asses the significance of the interactions under the multiple and complex dependencies expected between the biological processes. We illustrate our approach on two real data sets involving breast and lung cancer. More than 88 percent of the interactions found by our approach were deemed to be correct by an extensive manual review of literature. An interesting subset of such interactions is discussed in detail and shown to have the potential to open new avenues for research in lung and breast cancer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Akutsu:2012:FPA, author = "Tatsuya Akutsu and Sven Kosub and Avraham A. Melkman and Takeyuki Tamura", title = "Finding a Periodic Attractor of a {Boolean} Network", journal = j-TCBB, volume = "9", number = "5", pages = "1410--1421", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.87", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we study the problem of finding a periodic attractor of a Boolean network (BN), which arises in computational systems biology and is known to be NP-hard. Since a general case is quite hard to solve, we consider special but biologically important subclasses of BNs. For finding an attractor of period 2 of a BN consisting of $n$ OR functions of positive literals, we present a polynomial time algorithm. For finding an attractor of period 2 of a BN consisting of $n$ AND/OR functions of literals, we present an $ O(1.985^n) $ time algorithm. For finding an attractor of a fixed period of a BN consisting of $n$ nested canalyzing functions and having constant treewidth $w$, we present an $ O(n^{2 p (w + 1)} \poly (n)) $ time algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pang:2012:GSU, author = "Herbert Pang and Stephen L. George and Ken Hui and Tiejun Tong", title = "Gene Selection Using Iterative Feature Elimination Random Forests for Survival Outcomes", journal = j-TCBB, volume = "9", number = "5", pages = "1422--1431", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.63", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Although many feature selection methods for classification have been developed, there is a need to identify genes in high-dimensional data with censored survival outcomes. Traditional methods for gene selection in classification problems have several drawbacks. First, the majority of the gene selection approaches for classification are single-gene based. Second, many of the gene selection procedures are not embedded within the algorithm itself. The technique of random forests has been found to perform well in high-dimensional data settings with survival outcomes. It also has an embedded feature to identify variables of importance. Therefore, it is an ideal candidate for gene selection in high-dimensional data with survival outcomes. In this paper, we develop a novel method based on the random forests to identify a set of prognostic genes. We compare our method with several machine learning methods and various node split criteria using several real data sets. Our method performed well in both simulations and real data analysis. Additionally, we have shown the advantages of our approach over single-gene-based approaches. Our method incorporates multivariate correlations in microarray data for survival outcomes. The described method allows us to better utilize the information available from microarray data with survival outcomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Karacali:2012:HMV, author = "Bilge Karacali", title = "Hierarchical Motif Vectors for Prediction of Functional Sites in Amino Acid Sequences Using Quasi-Supervised Learning", journal = j-TCBB, volume = "9", number = "5", pages = "1432--1441", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.68", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose hierarchical motif vectors to represent local amino acid sequence configurations for predicting the functional attributes of amino acid sites on a global scale in a quasi-supervised learning framework. The motif vectors are constructed via wavelet decomposition on the variations of physico-chemical amino acid properties along the sequences. We then formulate a prediction scheme for the functional attributes of amino acid sites in terms of the respective motif vectors using the quasi-supervised learning algorithm that carries out predictions for all sites in consideration using only the experimentally verified sites. We have carried out comparative performance evaluation of the proposed method on the prediction of N-glycosylation of 55,184 sites possessing the consensus N-glycosylation sequon identified over 15,104 human proteins, out of which only 1,939 were experimentally verified N-glycosylation sites. In the experiments, the proposed method achieved better predictive performance than the alternative strategies from the literature. In addition, the predicted N-glycosylation sites showed good agreement with existing potential annotations, while the novel predictions belonged to proteins known to be modified by glycosylation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hennings-Yeomans:2012:IPC, author = "Pablo H. Hennings-Yeomans and Gregory F. Cooper", title = "Improving the Prediction of Clinical Outcomes from Genomic Data Using Multiresolution Analysis", journal = j-TCBB, volume = "9", number = "5", pages = "1442--1450", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.80", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The prediction of patient's future clinical outcome, such as Alzheimer's and cardiac disease, using only genomic information is an open problem. In cases when genome-wide association studies (GWASs) are able to find strong associations between genomic predictors (e.g., SNPs) and disease, pattern recognition methods may be able to predict the disease well. Furthermore, by using signal processing methods, we can capitalize on latent multivariate interactions of genomic predictors. Such an approach to genomic pattern recognition for prediction of clinical outcomes is investigated in this work. In particular, we show how multiresolution transforms can be applied to genomic data to extract cues of multivariate interactions and, in some cases, improve on the predictive performance of clinical outcomes of standard classification methods. Our results show, for example, that an improvement of about 6 percent increase of the area under the ROC curve can be achieved using multiresolution spaces to train logistic regression to predict late-onset Alzheimer's disease (LOAD) compared to logistic regression applied directly on SNP data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonnel:2012:LFP, author = "Nicolas Bonnel and Pierre-Fran{\c{c}}ois Marteau", title = "{LNA}: Fast Protein Structural Comparison Using a {Laplacian} Characterization of Tertiary Structure", journal = j-TCBB, volume = "9", number = "5", pages = "1451--1458", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.64", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the last two decades, a lot of protein 3D shapes have been discovered, characterized, and made available thanks to the Protein Data Bank (PDB), that is nevertheless growing very quickly. New scalable methods are thus urgently required to search through the PDB efficiently. This paper presents an approach entitled LNA (Laplacian Norm Alignment) that performs a structural comparison of two proteins with dynamic programming algorithms. This is achieved by characterizing each residue in the protein with scalar features. The feature values are calculated using a Laplacian operator applied on the graph corresponding to the adjacency matrix of the residues. The weighted Laplacian operator we use estimates, at various scales, local deformations of the topology where each residue is located. On some benchmarks, which are widely shared by the community, we obtain qualitatively similar results compared to other competing approaches, but with an algorithm one or two order of magnitudes faster. 180,000 protein comparisons can be done within 1 second with a single recent Graphical Processing Unit (GPU), which makes our algorithm very scalable and suitable for real-time database querying across the web.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sambo:2012:MMO, author = "Francesco Sambo and Marco A. Montes de Oca and Barbara {Di Camillo} and Gianna Toffolo and Thomas Stutzle", title = "{MORE}: Mixed Optimization for Reverse Engineering --- an Application to Modeling Biological Networks Response via Sparse Systems of Nonlinear Differential Equations", journal = j-TCBB, volume = "9", number = "5", pages = "1459--1471", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.56", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reverse engineering is the problem of inferring the structure of a network of interactions between biological variables from a set of observations. In this paper, we propose an optimization algorithm, called MORE, for the reverse engineering of biological networks from time series data. The model inferred by MORE is a sparse system of nonlinear differential equations, complex enough to realistically describe the dynamics of a biological system. MORE tackles separately the discrete component of the problem, the determination of the biological network topology, and the continuous component of the problem, the strength of the interactions. This approach allows us both to enforce system sparsity, by globally constraining the number of edges, and to integrate a priori information about the structure of the underlying interaction network. Experimental results on simulated and real-world networks show that the mixed discrete/continuous optimization approach of MORE significantly outperforms standard continuous optimization and that MORE is competitive with the state of the art in terms of accuracy of the inferred networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jacklin:2012:NCO, author = "Neil Jacklin and Zhi Ding and Wei Chen and Chunqi Chang", title = "Noniterative Convex Optimization Methods for Network Component Analysis", journal = j-TCBB, volume = "9", number = "5", pages = "1472--1481", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.81", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This work studies the reconstruction of gene regulatory networks by the means of network component analysis (NCA). We will expound a family of convex optimization-based methods for estimating the transcription factor control strengths and the transcription factor activities (TFAs). The approach taken in this work is to decompose the problem into a network connectivity strength estimation phase and a transcription factor activity estimation phase. In the control strength estimation phase, we formulate a new subspace-based method incorporating a choice of multiple error metrics. For the source estimation phase we propose a total least squares (TLS) formulation that generalizes many existing methods. Both estimation procedures are noniterative and yield the optimal estimates according to various proposed error metrics. We test the performance of the proposed algorithms on simulated data and experimental gene expression data for the yeast Saccharomyces cerevisiae and demonstrate that the proposed algorithms have superior effectiveness in comparison with both Bayesian Decomposition (BD) and our previous FastNCA approach, while the computational complexity is still orders of magnitude less than BD.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Badaloni:2012:QRB, author = "Silvana Badaloni and Barbara {Di Camillo} and Francesco Sambo", title = "Qualitative Reasoning for Biological Network Inference from Systematic Perturbation Experiments", journal = j-TCBB, volume = "9", number = "5", pages = "1482--1491", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.69", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The systematic perturbation of the components of a biological system has been proven among the most informative experimental setups for the identification of causal relations between the components. In this paper, we present Systematic Perturbation-Qualitative Reasoning (SPQR), a novel Qualitative Reasoning approach to automate the interpretation of the results of systematic perturbation experiments. Our method is based on a qualitative abstraction of the experimental data: for each perturbation experiment, measured values of the observed variables are modeled as lower, equal or higher than the measurements in the wild type condition, when no perturbation is applied. The algorithm exploits a set of IF-THEN rules to infer causal relations between the variables, analyzing the patterns of propagation of the perturbation signals through the biological network, and is specifically designed to minimize the rate of false positives among the inferred relations. Tested on both simulated and real perturbation data, SPQR indeed exhibits a significantly higher precision than the state of the art.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hawkins:2012:RFP, author = "John C. Hawkins and Hongbo Zhu and Joan Teyra and M. Teresa Pisabarro", title = "Reduced False Positives in {PDZ} Binding Prediction Using Sequence and Structural Descriptors", journal = j-TCBB, volume = "9", number = "5", pages = "1492--1503", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.54", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identifying the binding partners of proteins is a problem of fundamental importance in computational biology. The PDZ is one of the most common and well-studied protein binding domains, hence it is a perfect model system for designing protein binding predictors. The standard approach to identifying the binding partners of PDZ domains uses multiple sequence alignments to infer the set of contact residues that are used in a predictive model. We expand on the sequence alignment approach by incorporating structural information to generate descriptors of the binding site geometry. Furthermore, we generate a real-value score for binary predictions by applying a filter based on models that predict the probability distributions of contact residues at each of the canonical PDZ ligand binding positions. Under training cross validation, our model produced an order of magnitude more predictions at a false positive proportion (FPP) of 10 percent than our benchmark model chosen from the literature. Evaluated using an independent cross validation, with computationally predicted structures, our model was able to make five times as many predictions as the benchmark model, with a Matthews' correlation coefficient (MCC) of 0.33. In addition, our model achieved a false positive proportion of 0.14, while the benchmark model had a 0.25 false positive proportion.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2012:RBC, author = "Jianyong Sun and Jonathan M. Garibaldi and Kim Kenobi", title = "Robust {Bayesian} Clustering for Replicated Gene Expression Data", journal = j-TCBB, volume = "9", number = "5", pages = "1504--1514", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.85", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Experimental scientific data sets, especially biology data, usually contain replicated measurements. The replicated measurements for the same object are correlated, and this correlation must be carefully dealt with in scientific analysis. In this paper, we propose a robust Bayesian mixture model for clustering data sets with replicated measurements. The model aims not only to accurately cluster the data points taking the replicated measurements into consideration, but also to find the outliers (i.e., scattered objects) which are possibly required to be studied further. A tree-structured variational Bayes (VB) algorithm is developed to carry out model fitting. Experimental studies showed that our model compares favorably with the infinite Gaussian mixture model, while maintaining computational simplicity. We demonstrate the benefits of including the replicated measurements in the model, in terms of improved outlier detection rates in varying measurement uncertainty conditions. Finally, we apply the approach to clustering biological transcriptomics mRNA expression data sets with replicated measurements.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2012:SID, author = "Zhi-Zhong Chen and Fei Deng and Lusheng Wang", title = "Simultaneous Identification of Duplications, Losses, and Lateral Gene Transfers", journal = j-TCBB, volume = "9", number = "5", pages = "1515--1528", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.79", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We give a fixed-parameter algorithm for the problem of enumerating all minimum-cost LCA-reconciliations involving gene duplications, gene losses, and lateral gene transfers (LGTs) for a given species tree $S$ and a given gene tree $G$. Our algorithm can work for the weighted version of the problem, where the costs of a gene duplication, a gene loss, and an LGT are left to the user's discretion. The algorithm runs in $ O(m + 3^{k / c} n) $ time, where $m$ is the number of vertices in $S$, $n$ is the number of vertices in $G$, $c$ is the smaller between a gene duplication cost and an LGT cost, and $k$ is the minimum cost of an LCA-reconciliation between $S$ and $G$. The time complexity is indeed better if the cost of a gene loss is greater than 0. In particular, when the cost of a gene loss is at least 0.614c, the running time of the algorithm is $ O(m + 2.78^{k / c} n) $.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liao:2012:NMS, author = "Bo Liao and Xiong Li and Wen Zhu and Zhi Cao", title = "A Novel Method to Select Informative {SNPs} and Their Application in Genetic Association Studies", journal = j-TCBB, volume = "9", number = "5", pages = "1529--1534", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.70", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The association studies between complex diseases and single nucleotide polymorphisms (SNPs) or haplotypes have recently received great attention. However, these studies are limited by the cost of genotyping all SNPs. Therefore, it is essential to find a small subset of tag SNPs representing the rest of the SNPs. The presence of linkage disequilibrium between tag SNPs and the disease variant (genotyped or not), may allow fine mapping study. In this paper, we combine a nearest-means classifier (NMC) and ant colony algorithm to select tags. Results show that our method (ACO/NMC) can get a similar prediction accuracy with method BPSO/SVM and is better than BPSO/STAMPA for small data sets. For large data sets, although the prediction accuracy of our method is lower than BPSO/SVM, ACO/ NMC can reach a high accuracy ($ > 99 $ percent) in a relatively short time. when the number of tags increases, the time complexity of NMC is nearly linear growth. To find out that the ability of tags to locate disease locus, we simulate a case-control study and use two-locus haplotype analysis to quantitatively assess the power. The result showed that 20 percent of all SNPs selected by NMC have about 10 percent higher power than random tags, on average.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Joseph:2012:CPT, author = "Shaini Joseph and Shreyas Karnik and Pravin Nilawe and V. K. Jayaraman and Susan Idicula-Thomas", title = "{ClassAMP}: a Prediction Tool for Classification of Antimicrobial Peptides", journal = j-TCBB, volume = "9", number = "5", pages = "1535--1538", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.89", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Antimicrobial peptides (AMPs) are gaining popularity as anti-infective agents. Information on sequence features that contribute to target specificity of AMPs will aid in accelerating drug discovery programs involving them. In this study, an algorithm called ClassAMP using Random Forests (RFs) and Support Vector Machines (SVMs) has been developed to predict the propensity of a protein sequence to have antibacterial, antifungal, or antiviral activity. ClassAMP is available at \path=http://www.bicnirrh.res.in/classamp/=.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nounou:2012:MDB, author = "M. N. Nounou and H. N. Nounou and N. Meskin and A. Datta and E. R. Dougherty", title = "Multiscale Denoising of Biological Data: a Comparative Analysis", journal = j-TCBB, volume = "9", number = "5", pages = "1539--1545", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.67", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Measured microarray genomic and metabolic data are a rich source of information about the biological systems they represent. For example, time-series biological data can be used to construct dynamic genetic regulatory network models, which can be used to design intervention strategies to cure or manage major diseases. Also, copy number data can be used to determine the locations and extent of aberrations in chromosome sequences. Unfortunately, measured biological data are usually contaminated with errors that mask the important features in the data. Therefore, these noisy measurements need to be filtered to enhance their usefulness in practice. Wavelet-based multiscale filtering has been shown to be a powerful denoising tool. In this work, different batch as well as online multiscale filtering techniques are used to denoise biological data contaminated with white or colored noise. The performances of these techniques are demonstrated and compared to those of some conventional low-pass filters using two case studies. The first case study uses simulated dynamic metabolic data, while the second case study uses real copy number data. Simulation results show that significant improvement can be achieved using multiscale filtering over conventional filtering techniques.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Margaliot:2012:SAR, author = "Michael Margaliot and Tamir Tuller", title = "Stability Analysis of the Ribosome Flow Model", journal = j-TCBB, volume = "9", number = "5", pages = "1545--1552", month = sep, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.88", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Aug 28 17:31:04 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene translation is a central process in all living organisms. Developing a better understanding of this complex process may have ramifications to almost every biomedical discipline. Recently, Reuveni et al. proposed a new computational model of this process called the ribosome flow model (RFM). In this study, we show that the dynamical behavior of the RFM is relatively simple. There exists a unique equilibrium point $e$ and every trajectory converges to $e$. Furthermore, convergence is monotone in the sense that the distance to $e$ can never increase. This qualitative behavior is maintained for any feasible set of parameter values, suggesting that the RFM is highly robust. Our analysis is based on a contraction principle and the theory of monotone dynamical systems. These analysis tools may prove useful in studying other properties of the RFM as well as additional intracellular biological processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sagot:2012:EE, author = "Marie-France Sagot", title = "{EIC} Editorial", journal = j-TCBB, volume = "9", number = "6", pages = "1553--1557", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.155", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Degnan:2012:CSS, author = "James H. Degnan and Noah A. Rosenberg and Tanja Stadler", title = "A Characterization of the Set of Species Trees that Produce Anomalous Ranked Gene Trees", journal = j-TCBB, volume = "9", number = "6", pages = "1558--1568", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.110", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ranked gene trees, which consider both the gene tree topology and the sequence in which gene lineages separate, can potentially provide a new source of information for use in modeling genealogies and performing inference of species trees. Recently, we have calculated the probability distribution of ranked gene trees under the standard multispecies coalescent model for the evolution of gene lineages along the branches of a fixed species tree, demonstrating the existence of anomalous ranked gene trees (ARGTs), in which a ranked gene tree that does not match the ranked species tree can have greater probability under the model than the matching ranked gene tree. Here, we fully characterize the set of unranked species tree topologies that give rise to ARGTs, showing that this set contains all species tree topologies with five or more taxa, with the exceptions of caterpillars and pseudocaterpillars. The results have implications for the use of ranked gene trees in phylogenetic inference.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tang:2012:CEC, author = "Yang Tang and Zidong Wang and Huijun Gao and Stephen Swift and Jurgen Kurths", title = "A Constrained Evolutionary Computation Method for Detecting Controlling Regions of Cortical Networks", journal = j-TCBB, volume = "9", number = "6", pages = "1569--1581", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.124", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Controlling regions in cortical networks, which serve as key nodes to control the dynamics of networks to a desired state, can be detected by minimizing the eigenratio R and the maximum imaginary part $ \sigma $ of an extended connection matrix. Until now, optimal selection of the set of controlling regions is still an open problem and this paper represents the first attempt to include two measures of controllability into one unified framework. The detection problem of controlling regions in cortical networks is converted into a constrained optimization problem (COP), where the objective function R is minimized and $ \sigma $ is regarded as a constraint. Then, the detection of controlling regions of a weighted and directed complex network (e.g., a cortical network of a cat), is thoroughly investigated. The controlling regions of cortical networks are successfully detected by means of an improved dynamic hybrid framework (IDyHF). Our experiments verify that the proposed IDyHF outperforms two recently developed evolutionary computation methods in constrained optimization field and some traditional methods in control theory as well as graph theory. Based on the IDyHF, the controlling regions are detected in a microscopic and macroscopic way. Our results unveil the dependence of controlling regions on the number of driver nodes $l$ and the constraint $r$. The controlling regions are largely selected from the regions with a large in-degree and a small out-degree. When $ r = + \infty $, there exists a concave shape of the mean degrees of the driver nodes, i.e., the regions with a large degree are of great importance to the control of the networks when $l$ is small and the regions with a small degree are helpful to control the networks when $l$ increases. When $ r = 0 $, the mean degrees of the driver nodes increase as a function of $l$. We find that controlling $ \sigma $ is becoming more important in controlling a cortical network with increasing $l$. The methods and results of detecting controlling regions in this paper would promote the coordination and information consensus of various kinds of real-world complex networks including transportation networks, genetic regulatory networks, and social networks, etc.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pirola:2012:FPA, author = "Yuri Pirola and Gianluca Della Vedova and Stefano Biffani and Alessandra Stella and Paola Bonizzoni", title = "A Fast and Practical Approach to Genotype Phasing and Imputation on a Pedigree with Erroneous and Incomplete Information", journal = j-TCBB, volume = "9", number = "6", pages = "1582--1594", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.100", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The MINIMUM-RECOMBINANT HAPLOTYPE CONFIGURATION problem (MRHC) has been highly successful in providing a sound combinatorial formulation for the important problem of genotype phasing on pedigrees. Despite several algorithmic advances that have improved the efficiency, its applicability to real data sets has been limited since it does not take into account some important phenomena such as mutations, genotyping errors, and missing data. In this work, we propose the MINIMUM-RECOMBINANT HAPLOTYPE CONFIGURATION WITH BOUNDED ERRORS problem (MRHCE), which extends the original MRHC formulation by incorporating the two most common characteristics of real data: errors and missing genotypes (including untyped individuals). We describe a practical algorithm for MRHCE that is based on a reduction to the well-known Satisfiability problem (SAT) and exploits recent advances in the constraint programming literature. An experimental analysis demonstrates the biological soundness of the phasing model and the effectiveness (on both accuracy and performance) of the algorithm under several scenarios. The analysis on real data and the comparison with state-of-the-art programs reveals that our approach couples better scalability to large and complex pedigrees with the explicit inclusion of genotyping errors into the model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kazmi:2012:HCA, author = "N. Kazmi and M. A. Hossain and R. M. Phillips", title = "A Hybrid Cellular Automaton Model of Solid Tumor Growth and Bioreductive Drug Transport", journal = j-TCBB, volume = "9", number = "6", pages = "1595--1606", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.118", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Bioreductive drugs are a class of hypoxia selective drugs that are designed to eradicate the hypoxic fraction of solid tumors. Their activity depends upon a number of biological and pharmacological factors and we used a mathematical modeling approach to explore the dynamics of tumor growth, infusion, and penetration of the bioreductive drug Tirapazamine (TPZ). An in-silico model is implemented to calculate the tumor mass considering oxygen and glucose as key microenvironmental parameters. The next stage of the model integrated extra cellular matrix (ECM), cell-cell adhesion, and cell movement parameters as growth constraints. The tumor microenvironments strongly influenced tumor morphology and growth rates. Once the growth model was established, a hybrid model was developed to study drug dynamics inside the hypoxic regions of tumors. The model used 10, 50 and 100 $ \mu ${\rm M} as TPZ initial concentrations and determined TPZ pharmacokinetic (PK) (transport) and pharmacodynamics (cytotoxicity) properties inside hypoxic regions of solid tumor. The model results showed that diminished drug transport is a reason for TPZ failure and recommend the optimization of the drug transport properties in the emerging TPZ generations. The modeling approach used in this study is novel and can be a step to explore the behavioral dynamics of TPZ.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Abate:2012:MMS, author = "Alessandro Abate and Stephane Vincent and Roel Dobbe and Alberto Silletti and Neal Master and Jeffrey D. Axelrod and Claire J. Tomlin", title = "A Mathematical Model to Study the Dynamics of Epithelial Cellular Networks", journal = j-TCBB, volume = "9", number = "6", pages = "1607--1620", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Epithelia are sheets of connected cells that are essential across the animal kingdom. Experimental observations suggest that the dynamical behavior of many single-layered epithelial tissues has strong analogies with that of specific mechanical systems, namely large networks consisting of point masses connected through spring-damper elements and undergoing the influence of active and dissipating forces. Based on this analogy, this work develops a modeling framework to enable the study of the mechanical properties and of the dynamic behavior of large epithelial cellular networks. The model is built first by creating a network topology that is extracted from the actual cellular geometry as obtained from experiments, then by associating a mechanical structure and dynamics to the network via spring-damper elements. This scalable approach enables running simulations of large network dynamics: the derived modeling framework in particular is predisposed to be tailored to study general dynamics (for example, morphogenesis) of various classes of single-layered epithelial cellular networks. In this contribution, we test the model on a case study of the dorsal epithelium of the Drosophila melanogaster embryo during early dorsal closure (and, less conspicuously, germband retraction).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cavuslar:2012:TSA, author = "Gizem Cavuslar and Bulent Catay and Mehmet Serkan Apaydin", title = "A Tabu Search Approach for the {NMR} Protein Structure-Based Assignment Problem", journal = j-TCBB, volume = "9", number = "6", pages = "1621--1628", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.122", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2012:EAA, author = "Christopher Ma and Thomas K. F. Wong and T. W. Lam and W. K. Hon and K. Sadakane and S. M. Yiu", title = "An Efficient Alignment Algorithm for Searching Simple Pseudoknots over Long Genomic Sequence", journal = j-TCBB, volume = "9", number = "6", pages = "1629--1638", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.104", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Structural alignment has been shown to be an effective computational method to identify structural noncoding RNA (ncRNA) candidates as ncRNAs are known to be conserved in secondary structures. However, the complexity of the structural alignment algorithms becomes higher when the structure has pseudoknots. Even for the simplest type of pseudoknots (simple pseudoknots), the fastest algorithm runs in $ O(m n^3) $ time, where $m$, $n$ are the length of the query ncRNA (with known structure) and the length of the target sequence (with unknown structure), respectively. In practice, we are usually given a long DNA sequence and we try to locate regions in the sequence for possible candidates of a particular ncRNA. Thus, we need to run the structural alignment algorithm on every possible region in the long sequence. For example, finding candidates for a known ncRNA of length 100 on a sequence of length 50,000, it takes more than one day. In this paper, we provide an efficient algorithm to solve the problem for simple pseudoknots and it is shown to be 10 times faster. The speedup stems from an effective pruning strategy consisting of the computation of a lower bound score for the optimal alignment and an estimation of the maximum score that a candidate can achieve to decide whether to prune the current candidate or not.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ozyurt:2012:AIC, author = "I. Burak Ozyurt", title = "Automatic Identification and Classification of Noun Argument Structures in Biomedical Literature", journal = j-TCBB, volume = "9", number = "6", pages = "1639--1648", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.111", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The accelerating increase in the biomedical literature makes keeping up with recent advances challenging for researchers thus making automatic extraction and discovery of knowledge from this vast literature a necessity. Building such systems requires automatic detection of lexico-semantic event structures governed by the syntactic and semantic constraints of human languages in sentences of biomedical texts. The lexico-semantic event structures in sentences are centered around the predicates and most semantic role labeling (SRL) approaches focus only on the arguments of verb predicates and neglect argument taking nouns which also convey information in a sentence. In this article, a noun argument structure (NAS) annotated corpus named BioNom and a SRL system to identify and classify these structures is introduced. Also, a genetic algorithm-based feature selection (GAFS) method is introduced and global inference is applied to significantly improve the performance of the NAS Bio SRL system.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2012:BIC, author = "Meng-Yun Wu and Dao-Qing Dai and Yu Shi and Hong Yan and Xiao-Fei Zhang", title = "Biomarker Identification and Cancer Classification Based on Microarray Data Using {Laplace} Naive {Bayes} Model with Mean Shrinkage", journal = j-TCBB, volume = "9", number = "6", pages = "1649--1662", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.105", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biomarker identification and cancer classification are two closely related problems. In gene expression data sets, the correlation between genes can be high when they share the same biological pathway. Moreover, the gene expression data sets may contain outliers due to either chemical or electrical reasons. A good gene selection method should take group effects into account and be robust to outliers. In this paper, we propose a Laplace naive Bayes model with mean shrinkage (LNB-MS). The Laplace distribution instead of the normal distribution is used as the conditional distribution of the samples for the reasons that it is less sensitive to outliers and has been applied in many fields. The key technique is the $ L_1 $ penalty imposed on the mean of each class to achieve automatic feature selection. The objective function of the proposed model is a piecewise linear function with respect to the mean of each class, of which the optimal value can be evaluated at the breakpoints simply. An efficient algorithm is designed to estimate the parameters in the model. A new strategy that uses the number of selected features to control the regularization parameter is introduced. Experimental results on simulated data sets and 17 publicly available cancer data sets attest to the accuracy, sparsity, efficiency, and robustness of the proposed algorithm. Many biomarkers identified with our method have been verified in biochemical or biomedical research. The analysis of biological and functional correlation of the genes based on Gene Ontology (GO) terms shows that the proposed method guarantees the selection of highly correlated genes simultaneously.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Irsoy:2012:DAC, author = "Ozan Irsoy and Olcay Taner Yildiz and Ethem Alpaydin", title = "Design and Analysis of Classifier Learning Experiments in Bioinformatics: Survey and Case Studies", journal = j-TCBB, volume = "9", number = "6", pages = "1663--1675", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.117", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In many bioinformatics applications, it is important to assess and compare the performances of algorithms trained from data, to be able to draw conclusions unaffected by chance and are therefore significant. Both the design of such experiments and the analysis of the resulting data using statistical tests should be done carefully for the results to carry significance. In this paper, we first review the performance measures used in classification, the basics of experiment design and statistical tests. We then give the results of our survey over 1,500 papers published in the last two years in three bioinformatics journals (including this one). Although the basics of experiment design are well understood, such as resampling instead of using a single training set and the use of different performance metrics instead of error, only 21 percent of the papers use any statistical test for comparison. In the third part, we analyze four different scenarios which we encounter frequently in the bioinformatics literature, discussing the proper statistical methodology as well as showing an example case study for each. With the supplementary software, we hope that the guidelines we discuss will play an important role in future studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ashlock:2012:DER, author = "Wendy Ashlock and Suprakash Datta", title = "Distinguishing Endogenous Retroviral {LTRs} from {SINE} Elements Using Features Extracted from Evolved Side Effect Machines", journal = j-TCBB, volume = "9", number = "6", pages = "1676--1689", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.116", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Side effect machines produce features for classifiers that distinguish different types of DNA sequences. They have the, as yet unexploited, potential to give insight into biological features of the sequences. We introduce several innovations to the production and use of side effect machine sequence features. We compare the results of using consensus sequences and genomic sequences for training classifiers and find that more accurate results can be obtained using genomic sequences. Surprisingly, we were even able to build a classifier that distinguished consensus sequences from genomic sequences with high accuracy, suggesting that consensus sequences are not always representative of their genomic counterparts. We apply our techniques to the problem of distinguishing two types of transposable elements, solo LTRs and SINEs. Identifying these sequences is important because they affect gene expression, genome structure, and genetic diversity, and they serve as genetic markers. They are of similar length, neither codes for protein, and both have many nearly identical copies throughout the genome. Being able to efficiently and automatically distinguish them will aid efforts to improve annotations of genomes. Our approach reveals structural characteristics of the sequences of potential interest to biologists.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tsai:2012:IPP, author = "Richard Tzong-Han Tsai", title = "Improving Protein-Protein Interaction Pair Ranking with an Integrated Global Association Score", journal = j-TCBB, volume = "9", number = "6", pages = "1690--1695", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.99", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-protein interaction (PPI) database curation requires text-mining systems that can recognize and normalize interactor genes and return a ranked list of PPI pairs for each article. The order of PPI pairs in this list is essential for ease of curation. Most of the current PPI pair ranking approaches rely on association analysis between the two genes in the pair. However, we propose that ranking an extracted PPI pair by considering both the association between the paired genes and each of those genes' global associations with all other genes mentioned in the paper can provide a more reliable ranked list. In this work, we present a composite interaction score that considers not only the association score between two interactors (pair association score) but also their global association scores. We test three representative data fusion algorithms to estimate this global association score-two Borda-Fuse models and one linear combination model (LCM). The three estimation methods are evaluated using the data set of the BioCreative II.5 Interaction Pair Task (IPT) in terms of area under the interpolated precision/recall curve (AUC iP/R). Our experimental results indicate that using LCM to estimate the global association score can boost the AUC iP/R score from 0.0175 to 0.2396, outperforming the best BioCreative II.5 IPT system.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hashemikhabir:2012:LSS, author = "Seyedsasan Hashemikhabir and Eyup Serdar Ayaz and Yusuf Kavurucu and Tolga Can and Tamer Kahveci", title = "Large-Scale Signaling Network Reconstruction", journal = j-TCBB, volume = "9", number = "6", pages = "1696--1708", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstructing the topology of a signaling network by means of RNA interference (RNAi) technology is an underdetermined problem especially when a single gene in the network is knocked down or observed. In addition, the exponential search space limits the existing methods to small signaling networks of size 10-15 genes. In this paper, we propose integrating RNAi data with a reference physical interaction network. We formulate the problem of signaling network reconstruction as finding the minimum number of edit operations on a given reference network. The edit operations transform the reference network to a network that satisfies the RNAi observations. We show that using a reference network does not simplify the computational complexity of the problem. Therefore, we propose two methods which provide near optimal results and can scale well for reconstructing networks up to hundreds of components. We validate the proposed methods on synthetic and real data sets. Comparison with the state of the art on real signaling networks shows that the proposed methodology can scale better and generates biologically significant results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sargsyan:2012:MSR, author = "Khachik Sargsyan and Cosmin Safta and Bert Debusschere and Habib Najm", title = "Multiparameter Spectral Representation of Noise-Induced Competence in \bioname{Bacillus Subtilis}", journal = j-TCBB, volume = "9", number = "6", pages = "1709--1723", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.107", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, the problem of representing a stochastic forward model output with respect to a large number of input parameters is considered. The methodology is applied to a stochastic reaction network of competence dynamics in Bacillus subtilis bacterium. In particular, the dependence of the competence state on rate constants of underlying reactions is investigated. We base our methodology on Polynomial Chaos (PC) spectral expansions that allow effective propagation of input parameter uncertainties to outputs of interest. Given a number of forward model training runs at sampled input parameter values, the PC modes are estimated using a Bayesian framework. As an outcome, these PC modes are described with posterior probability distributions. The resulting expansion can be regarded as an uncertain response function and can further be used as a computationally inexpensive surrogate instead of the original reaction model for subsequent analyses such as calibration or optimization studies. Furthermore, the methodology is enhanced with a classification-based mixture PC formulation that overcomes the difficulties associated with representing potentially nonsmooth input-output relationships. Finally, the global sensitivity analysis based on the multiparameter spectral representation of an observable of interest provides biological insight and reveals the most important reactions and their couplings for the competence dynamics.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Margaliot:2012:SSD, author = "Michael Margaliot and Tamir Tuller", title = "On the Steady-State Distribution in the Homogeneous Ribosome Flow Model", journal = j-TCBB, volume = "9", number = "6", pages = "1724--1736", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.120", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A central biological process in all living organisms is gene translation. Developing a deeper understanding of this complex process may have ramifications to almost every biomedical discipline. Reuveni et al. recently proposed a new computational model of gene translation called the Ribosome Flow Model (RFM). In this paper, we consider a particular case of this model, called the Homogeneous Ribosome Flow Model (HRFM). From a biological viewpoint, this corresponds to the case where the transition rates of all the coding sequence codons are identical. This regime has been suggested recently based on experiments in mouse embryonic cells. We consider the steady-state distribution of the HRFM. We provide formulas that relate the different parameters of the model in steady state. We prove the following properties: (1) the ribosomal density profile is monotonically decreasing along the coding sequence; (2) the ribosomal density at each codon monotonically increases with the initiation rate; and (3) for a constant initiation rate, the translation rate monotonically decreases with the length of the coding sequence. In addition, we analyze the translation rate of the HRFM at the limit of very high and very low initiation rate, and provide explicit formulas for the translation rate in these two cases. We discuss the relationship between these theoretical results and biological findings on the translation process.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Marschall:2012:PAA, author = "Tobias Marschall and Inke Herms and Hans-Michael Kaltenbach and Sven Rahmann", title = "Probabilistic Arithmetic Automata and Their Applications", journal = j-TCBB, volume = "9", number = "6", pages = "1737--1750", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.109", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a comprehensive review on probabilistic arithmetic automata (PAAs), a general model to describe chains of operations whose operands depend on chance, along with two algorithms to numerically compute the distribution of the results of such probabilistic calculations. PAAs provide a unifying framework to approach many problems arising in computational biology and elsewhere. We present five different applications, namely (1) pattern matching statistics on random texts, including the computation of the distribution of occurrence counts, waiting times, and clump sizes under hidden Markov background models; (2) exact analysis of window-based pattern matching algorithms; (3) sensitivity of filtration seeds used to detect candidate sequence alignments; (4) length and mass statistics of peptide fragments resulting from enzymatic cleavage reactions; and (5) read length statistics of 454 and IonTorrent sequencing reads. The diversity of these applications indicates the flexibility and unifying character of the presented framework. While the construction of a PAA depends on the particular application, we single out a frequently applicable construction method: We introduce deterministic arithmetic automata (DAAs) to model deterministic calculations on sequences, and demonstrate how to construct a PAA from a given DAA and a finite-memory random text model. This procedure is used for all five discussed applications and greatly simplifies the construction of PAAs. Implementations are available as part of the MoSDi package. Its application programming interface facilitates the rapid development of new applications based on the PAA framework.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2012:STS, author = "Zhiwen Yu and Le Li and Jane You and Hau-San Wong and Guoqiang Han", title = "{SC$^3$}: Triple Spectral Clustering-Based Consensus {Clustering} Framework for Class Discovery from Cancer Gene Expression Profiles", journal = j-TCBB, volume = "9", number = "6", pages = "1751--1765", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.108", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In order to perform successful diagnosis and treatment of cancer, discovering, and classifying cancer types correctly is essential. One of the challenging properties of class discovery from cancer data sets is that cancer gene expression profiles not only include a large number of genes, but also contains a lot of noisy genes. In order to reduce the effect of noisy genes in cancer gene expression profiles, we propose two new consensus clustering frameworks, named as triple spectral clustering-based consensus clustering (SC$^3$ ) and double spectral clustering-based consensus clustering (SC$^2$Ncut) in this paper, for cancer discovery from gene expression profiles. SC$^3$ integrates the spectral clustering (SC) algorithm multiple times into the ensemble framework to process gene expression profiles. Specifically, spectral clustering is applied to perform clustering on the gene dimension and the cancer sample dimension, and also used as the consensus function to partition the consensus matrix constructed from multiple clustering solutions. Compared with SC$^3$, SC$^2$Ncut adopts the normalized cut algorithm, instead of spectral clustering, as the consensus function. Experiments on both synthetic data sets and real cancer gene expression profiles illustrate that the proposed approaches not only achieve good performance on gene expression profiles, but also outperforms most of the existing approaches in the process of class discovery from these profiles.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2012:SBP, author = "Xin Ma and Jing Guo and Hong-De Liu and Jian-Ming Xie and Xiao Sun", title = "Sequence-Based Prediction of {DNA}-Binding Residues in Proteins with Conservation and Correlation Information", journal = j-TCBB, volume = "9", number = "6", pages = "1766--1775", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.106", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2012:SCM, author = "Jianxin Wang and Yuannan Huang and Fang-Xiang Wu and Yi Pan", title = "Symmetry Compression Method for Discovering Network Motifs", journal = j-TCBB, volume = "9", number = "6", pages = "1776--1789", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.119", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Discovering network motifs could provide a significant insight into systems biology. Interestingly, many biological networks have been found to have a high degree of symmetry (automorphism), which is inherent in biological network topologies. The symmetry due to the large number of basic symmetric subgraphs (BSSs) causes a certain redundant calculation in discovering network motifs. Therefore, we compress all basic symmetric subgraphs before extracting compressed subgraphs and propose an efficient decompression algorithm to decompress all compressed subgraphs without loss of any information. In contrast to previous approaches, the novel Symmetry Compression method for Motif Detection, named as SCMD, eliminates most redundant calculations caused by widespread symmetry of biological networks. We use SCMD to improve three notable exact algorithms and two efficient sampling algorithms. Results of all exact algorithms with SCMD are the same as those of the original algorithms, since SCMD is a lossless method. The sampling results show that the use of SCMD almost does not affect the quality of sampling results. For highly symmetric networks, we find that SCMD used in both exact and sampling algorithms can help get a remarkable speedup. Furthermore, SCMD enables us to find larger motifs in biological networks with notable symmetry than previously possible.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Amin:2012:TSG, author = "Mohammad Shafkat Amin and Russell L. {Finley, Jr.} and Hasan M. Jamil", title = "Top-$k$ Similar Graph Matching Using {TraM} in Biological Networks", journal = j-TCBB, volume = "9", number = "6", pages = "1790--1804", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.90", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many emerging database applications entail sophisticated graph-based query manipulation, predominantly evident in large-scale scientific applications. To access the information embedded in graphs, efficient graph matching tools and algorithms have become of prime importance. Although the prohibitively expensive time complexity associated with exact subgraph isomorphism techniques has limited its efficacy in the application domain, approximate yet efficient graph matching techniques have received much attention due to their pragmatic applicability. Since public domain databases are noisy and incomplete in nature, inexact graph matching techniques have proven to be more promising in terms of inferring knowledge from numerous structural data repositories. In this paper, we propose a novel technique called TraM for approximate graph matching that off-loads a significant amount of its processing on to the database making the approach viable for large graphs. Moreover, the vector space embedding of the graphs and efficient filtration of the search space enables computation of approximate graph similarity at a throw-away cost. We annotate nodes of the query graphs by means of their global topological properties and compare them with neighborhood biased segments of the data-graph for proper matches. We have conducted experiments on several real data sets, and have demonstrated the effectiveness and efficiency of the proposed method", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dwight:2012:UWB, author = "Zachary L. Dwight and Robert Palais and Carl T. Wittwer", title = "{uAnalyze}: {Web}-Based High-Resolution {DNA} Melting Analysis with Comparison to Thermodynamic Predictions", journal = j-TCBB, volume = "9", number = "6", pages = "1805--1811", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.112", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Re:2012:FRA, author = "Matteo Re and Marco Mesiti and Giorgio Valentini", title = "A Fast Ranking Algorithm for Predicting Gene Functions in Biomolecular Networks", journal = j-TCBB, volume = "9", number = "6", pages = "1812--1818", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ranking genes in functional networks according to a specific biological function is a challenging task raising relevant performance and computational complexity problems. To cope with both these problems we developed a transductive gene ranking method based on kernelized score functions able to fully exploit the topology and the graph structure of biomolecular networks and to capture significant functional relationships between genes. We run the method on a network constructed by integrating multiple biomolecular data sources in the yeast model organism, achieving significantly better results than the compared state-of-the-art network-based algorithms for gene function prediction, and with relevant savings in computational time. The proposed approach is general and fast enough to be in perspective applied to other relevant node ranking problems in large and complex biological networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nounou:2012:FIB, author = "Hazem N. Nounou and Mohamed N. Nounou and Nader Meskin and Aniruddha Datta and Edward R. Dougherty", title = "Fuzzy Intervention in Biological Phenomena", journal = j-TCBB, volume = "9", number = "6", pages = "1819--1825", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.113", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An important objective of modeling biological phenomena is to develop therapeutic intervention strategies to move an undesirable state of a diseased network toward a more desirable one. Such transitions can be achieved by the use of drugs to act on some genes/metabolites that affect the undesirable behavior. Due to the fact that biological phenomena are complex processes with nonlinear dynamics that are impossible to perfectly represent with a mathematical model, the need for model-free nonlinear intervention strategies that are capable of guiding the target variables to their desired values often arises. In many applications, fuzzy systems have been found to be very useful for parameter estimation, model development and control design of nonlinear processes. In this paper, a model-free fuzzy intervention strategy (that does not require a mathematical model of the biological phenomenon) is proposed to guide the target variables of biological systems to their desired values. The proposed fuzzy intervention strategy is applied to three different biological models: a glycolytic-glycogenolytic pathway model, a purine metabolism pathway model, and a generic pathway model. The simulation results for all models demonstrate the effectiveness of the proposed scheme.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Papadakis:2012:SSD, author = "George Papadakis and Electra Gizeli", title = "In Silico Search of {DNA} Drugs Targeting Oncogenes", journal = j-TCBB, volume = "9", number = "6", pages = "1826--1830", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Triplex forming oligonucleotides (TFOs) represent a class of drug candidates for antigene therapy. Based on strict criteria, we investigated the potential of 25 known oncogenes to be regulated by TFOs in the mRNA synthesis level and we report specific target sequences found in seven of these genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bicego:2012:ITM, author = "Manuele Bicego and Pietro Lovato and Alessandro Perina and Marianna Fasoli and Massimo Delledonne and Mario Pezzotti and Annalisa Polverari and Vittorio Murino", title = "Investigating Topic Models' Capabilities in Expression Microarray Data Classification", journal = j-TCBB, volume = "9", number = "6", pages = "1831--1836", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.121", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In recent years a particular class of probabilistic graphical models-called topic models-has proven to represent an useful and interpretable tool for understanding and mining microarray data. In this context, such models have been almost only applied in the clustering scenario, whereas the classification task has been disregarded by researchers. In this paper, we thoroughly investigate the use of topic models for classification of microarray data, starting from ideas proposed in other fields (e.g., computer vision). A classification scheme is proposed, based on highly interpretable features extracted from topic models, resulting in a hybrid generative-discriminative approach; an extensive experimental evaluation, involving 10 different literature benchmarks, confirms the suitability of the topic models for classifying expression microarray data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Grassi:2012:KSP, author = "Elena Grassi and Federico {Di Gregorio} and Ivan Molineris", title = "{KungFQ}: a Simple and Powerful Approach to Compress {{\tt fastq}} Files", journal = j-TCBB, volume = "9", number = "6", pages = "1837--1842", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.123", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nowadays storing data derived from deep sequencing experiments has become pivotal and standard compression algorithms do not exploit in a satisfying manner their structure. A number of reference-based compression algorithms have been developed but they are less adequate when approaching new species without fully sequenced genomes or nongenomic data. We developed a tool that takes advantages of {\tt fastq} characteristics and encodes them in a binary format optimized in order to be further compressed with standard tools (such as {\tt gzip} or {\tt lzma}). The algorithm is straightforward and does not need any external reference file, it scans the {\tt fastq} only once and has a constant memory requirement. Moreover, we added the possibility to perform lossy compression, losing some of the original information (IDs and/or qualities) but resulting in smaller files; it is also possible to define a quality cutoff under which corresponding base calls are converted to $N$. We achieve 2.82 to 7.77 compression ratios on various {\tt fastq} files without losing information and 5.37 to 8.77 losing IDs, which are often not used in common analysis pipelines. In this paper, we compare the algorithm performance with known tools, usually obtaining higher compression levels.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Boucher:2012:HCS, author = "Christina Boucher and Mohamed Omar", title = "On the Hardness of Counting and Sampling Center Strings", journal = j-TCBB, volume = "9", number = "6", pages = "1843--1846", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.84", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Given a set $S$ of $n$ strings, each of length $ \ell $, and a nonnegative value $d$, we define a center string as a string of length $ \ell $ that has Hamming distance at most $d$ from each string in $S$. The \#{\rm CLOSEST STRING} problem aims to determine the number of center strings for a given set of strings $S$ and input parameters $n$, $ \ell $, and $d$. We show \#{\rm CLOSEST STRING} is impossible to solve exactly or even approximately in polynomial time, and that restricting \#{\rm CLOSEST STRING} so that any one of the parameters $n$, $ \ell $, or $d$ is fixed leads to a fully polynomial-time randomized approximation scheme (FPRAS). We show equivalent results for the problem of efficiently sampling center strings uniformly at random (u.a.r.).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gyorffy:2012:PMR, author = "Daniel Gyorffy and Peter Zavodszky and Andras Szilagyi", title = "``Pull Moves'' for Rectangular Lattice Polymer Models Are Not Fully Reversible", journal = j-TCBB, volume = "9", number = "6", pages = "1847--1849", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "``Pull moves'' is a popular move set for lattice polymer model simulations. We show that the proof given for its reversibility earlier is flawed, and some moves are irreversible, which leads to biases in the parameters estimated from the simulations. We show how to make the move set fully reversible.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Campello:2012:SMA, author = "Ricardo J. G. B. Campello and Davoud Moulavi and Joerg Sander", title = "A Simpler and More Accurate {AUTO--HDS} Framework for Clustering and Visualization of Biological Data", journal = j-TCBB, volume = "9", number = "6", pages = "1850--1852", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.115", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In [CHECK END OF SENTENCE], the authors proposed a framework for automated clustering and visualization of biological data sets named AUTO-HDS. This letter is intended to complement that framework by showing that it is possible to get rid of a user-defined parameter in a way that the clustering stage can be implemented more accurately while having reduced computational complexity", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Handoko:2012:EQA, author = "Stephanus Daniel Handoko and Xuchang Ouyang and Chinh Tran To Su and Chee Keong Kwoh and Yew Soon Ong", title = "Erratum to {``QuickVina: Accelerating AutoDock Vina Using Gradient-Based Heuristics for Global Optimization''}", journal = j-TCBB, volume = "9", number = "6", pages = "1853", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.156", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2012:CPS, author = "Anonymous", title = "Call for Papers: Special Issue on `-Omics' Based Companion Diagnostics for Personalized Medicine", journal = j-TCBB, volume = "9", number = "6", pages = "1855--1855", month = nov, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.150", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 19 17:33:56 MST 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prospective authors are requested to submit new, unpublished manuscripts for inclusion in the upcoming event described in this call for papers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2013:ENE, author = "Ying Xu", title = "Editorial from the New {Editor-in--Chief}", journal = j-TCBB, volume = "10", number = "1", pages = "1--1", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.56", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ray:2013:RSS, author = "Shubhra Sankar Ray and Sankar K. Pal", title = "{RNA} Secondary Structure Prediction Using Soft Computing", journal = j-TCBB, volume = "10", number = "1", pages = "2--17", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.159", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of RNA structure is invaluable in creating new drugs and understanding genetic diseases. Several deterministic algorithms and soft computing-based techniques have been developed for more than a decade to determine the structure from a known RNA sequence. Soft computing gained importance with the need to get approximate solutions for RNA sequences by considering the issues related with kinetic effects, cotranscriptional folding, and estimation of certain energy parameters. A brief description of some of the soft computing-based techniques, developed for RNA secondary structure prediction, is presented along with their relevance. The basic concepts of RNA and its different structural elements like helix, bulge, hairpin loop, internal loop, and multiloop are described. These are followed by different methodologies, employing genetic algorithms, artificial neural networks, and fuzzy logic. The role of various metaheuristics, like simulated annealing, particle swarm optimization, ant colony optimization, and tabu search is also discussed. A relative comparison among different techniques, in predicting 12 known RNA secondary structures, is presented, as an example. Future challenging issues are then mentioned.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Piovesan:2013:SFP, author = "Teresa Piovesan and Steven Kelk", title = "A Simple Fixed Parameter Tractable Algorithm for Computing the Hybridization Number of Two (Not Necessarily Binary) Trees", journal = j-TCBB, volume = "10", number = "1", pages = "18--25", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.134", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Here, we present a new fixed parameter tractable algorithm to compute the hybridization number $ (r) $ of two rooted, not necessarily binary phylogenetic trees on taxon set {$ ({{\cal X}}) $} in time $ ((6^r r!) \cdot p o l y(n)) $, where {$ (n = \vert {{\cal X}} \vert) $}. The novelty of this approach is its use of terminals, which are maximal elements of a natural partial order on {$ ({{\cal X}}) $}, and several insights from the softwired clusters literature. This yields a surprisingly simple and practical bounded-search algorithm and offers an alternative perspective on the underlying combinatorial structure of the hybridization number problem.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wohlers:2013:DOD, author = "Inken Wohlers and Rumen Andonov and Gunnar W. Klau", title = "{DALIX}: Optimal {DALI} Protein Structure Alignment", journal = j-TCBB, volume = "10", number = "1", pages = "26--36", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.143", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a mathematical model and exact algorithm for optimally aligning protein structures using the dali scoring model. This scoring model is based on comparing the interresidue distance matrices of proteins and is used in the popular dali software tool, a heuristic method for protein structure alignment. Our model and algorithm extend an integer linear programming approach that has been previously applied for the related, but simpler, contact map overlap problem. To this end, we introduce a novel type of constraint that handles negative score values and relax it in a Lagrangian fashion. The new algorithm, which we call dalix, is applicable to any distance matrix-based scoring scheme. We also review options that allow to consider fewer pairs of interresidue distances explicitly because their large number hinders the optimization process. Using four known data sets of varying structural similarity, we compute many provably score-optimal dali alignments. This allowed, for the first time, to evaluate the dali heuristic in sound mathematical terms. The results indicate that dali usually computes optimal or close to optimal alignments. However, we detect a subset of small proteins for which dali fails to generate any significant alignment, although such alignments do exist.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Higa:2013:GSG, author = "Carlos H. A. Higa and Tales P. Andrade and Ronaldo Fumio Hashimoto", title = "Growing Seed Genes from Time Series Data and Thresholded {Boolean} Networks with Perturbation", journal = j-TCBB, volume = "10", number = "1", pages = "37--49", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.169", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Models of gene regulatory networks (GRN) have been proposed along with algorithms for inferring their structure. By structure, we mean the relationships among the genes of the biological system under study. Despite the large number of genes found in the genome of an organism, it is believed that a small set of genes is responsible for maintaining a specific core regulatory mechanism (small subnetworks). We propose an algorithm for inference of subnetworks of genes from a small initial set of genes called seed and time series gene expression data. The algorithm has two main steps: First, it grows the seed of genes by adding genes to it, and second, it searches for subnetworks that can be biologically meaningful. The seed growing step is treated as a feature selection problem and we used a thresholded Boolean network with a perturbation model to design the criterion function that is used to select the features (genes). Given that the reverse engineering of GRN is a problem that does not necessarily have one unique solution, the proposed algorithm has as output a set of networks instead of one single network. The algorithm also analyzes the dynamics of the networks which can be time-consuming. Nevertheless, the algorithm is suitable when the number of genes is small. The results showed that the algorithm is capable of recovering an acceptable rate of gene interactions and to generate regulatory hypotheses that can be explored in the wet lab.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Muraro:2013:IGN, author = "Daniele Muraro and Ute Vob and Michael Wilson and Malcolm Bennett and Helen Byrne and Ive {De Smet} and Charlie Hodgman and John King", title = "Inference of the Genetic Network Regulating Lateral Root Initiation in \bioname{Arabidopsis thaliana}", journal = j-TCBB, volume = "10", number = "1", pages = "50--60", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.3", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Regulation of gene expression is crucial for organism growth, and it is one of the challenges in systems biology to reconstruct the underlying regulatory biological networks from transcriptomic data. The formation of lateral roots in Arabidopsis thaliana is stimulated by a cascade of regulators of which only the interactions of its initial elements have been identified. Using simulated gene expression data with known network topology, we compare the performance of inference algorithms, based on different approaches, for which ready-to-use software is available. We show that their performance improves with the network size and the inclusion of mutants. We then analyze two sets of genes, whose activity is likely to be relevant to lateral root initiation in Arabidopsis, and assess causality of their regulatory interactions by integrating sequence analysis with the intersection of the results of the best performing methods on time series and mutants. The methods applied capture known interactions between genes that are candidate regulators at early stages of development. The network inferred from genes significantly expressed during lateral root formation exhibits distinct scale free, small world and hierarchical properties and the nodes with a high out-degree may warrant further investigation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Than:2013:MPD, author = "Cuong V. Than and Noah A. Rosenberg", title = "Mathematical Properties of the Deep Coalescence Cost", journal = j-TCBB, volume = "10", number = "1", pages = "61--72", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.133", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the minimizing-deep-coalescences (MDC) approach for species tree inference, a tree that has the minimal deep coalescence cost for reconciling a collection of gene trees is taken as an estimate of the species tree topology. The MDC method possesses the desirable Pareto property, and in practice it is quite accurate and computationally efficient. Here, in order to better understand the MDC method, we investigate some properties of the deep coalescence cost. We prove that the unit neighborhood of either a rooted species tree or a rooted gene tree under the deep coalescence cost is exactly the same as the tree's unit neighborhood under the rooted nearest-neighbor interchange (NNI) distance. Next, for a fixed species tree, we obtain the maximum deep coalescence cost across all gene trees as well as the number of gene trees that achieve the maximum cost. We also study corresponding problems for a fixed gene tree.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Boyen:2013:MMM, author = "Peter Boyen and Frank Neven and Dries {Van Dyck} and Felipe Valentim and Aalt van Dijk", title = "Mining Minimal Motif Pair Sets Maximally Covering Interactions in a Protein-Protein Interaction Network", journal = j-TCBB, volume = "10", number = "1", pages = "73--86", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.165", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Correlated motif covering (CMC) is the problem of finding a set of motif pairs, i.e., pairs of patterns, in the sequences of proteins from a protein-protein interaction network (PPI-network) that describe the interactions in the network as concisely as possible. In other words, a perfect solution for CMC would be a minimal set of motif pairs that describes the interaction behavior perfectly in the sense that two proteins from the network interact if and only if their sequences match a motif pair in the minimal set. In this paper, we introduce and formally define CMC and show that it is closely related to the red-blue set cover (RBSC) problem and its weighted version (WRBSC)-both well-known NP-hard problems for that there exist several algorithms with known approximation factor guarantees. We prove the hardness of approximation of CMC by providing an approximation factor preserving reduction from RBSC to CMC. We show the existence of a theoretical approximation algorithm for CMC by providing an approximation factor preserving reduction from CMC to WRBSC. We adapt the latter algorithm into a functional heuristic for CMC, called CMC-approx, and experimentally assess its performance and biological relevance. The implementation in Java can be found at {\tt http://bioinformatics.uhasselt.be}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rajapakse:2013:MGS, author = "Jagath C. Rajapakse and Piyushkumar A. Mundra", title = "Multiclass Gene Selection Using {Pareto}-Fronts", journal = j-TCBB, volume = "10", number = "1", pages = "87--97", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.1", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Filter methods are often used for selection of genes in multiclass sample classification by using microarray data. Such techniques usually tend to bias toward a few classes that are easily distinguishable from other classes due to imbalances of strong features and sample sizes of different classes. It could therefore lead to selection of redundant genes while missing the relevant genes, leading to poor classification of tissue samples. In this manuscript, we propose to decompose multiclass ranking statistics into class-specific statistics and then use Pareto-front analysis for selection of genes. This alleviates the bias induced by class intrinsic characteristics of dominating classes. The use of Pareto-front analysis is demonstrated on two filter criteria commonly used for gene selection: F-score and KW-score. A significant improvement in classification performance and reduction in redundancy among top-ranked genes were achieved in experiments with both synthetic and real-benchmark data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2013:PEL, author = "Ye Yang and Farnoosh A. Aghababazadeh and David R. Bickel", title = "Parametric Estimation of the Local False Discovery Rate for Identifying Genetic Associations", journal = j-TCBB, volume = "10", number = "1", pages = "98--108", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.140", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many genome-wide association studies have been conducted to identify single nucleotide polymorphisms (SNPs) that are associated with particular diseases or other traits. The local false discovery rate (LFDR) estimated using semiparametric models has enjoyed success in simultaneous inference. However, semiparametric LFDR estimators can be biased because they tend to overestimate the proportion of the nonassociated SNPs. We address the problem by adapting a simple parametric mixture model (PMM) and by comparing this model to the semiparametric mixture model (SMM) behind an LFDR estimator that is known to be conservatively biased. Then, we also compare the PMM with a parametric nonmixture model (PNM). In our simulation studies, we thoroughly analyze the performances of the three models under different values of $ (p_1) $, a prior probability that is approximately equal to the proportion of SNPs that are associated with the disease. When $ (p_1 > 10 \%) $, the PMM generally performs better than the SMM. When $ (p_1 < 0.1 \%) $, the SMM outperforms PMM. When $ (p_1) $ lies between 0.1 and 10 percent, both methods have about the same performance. In that setting, the PMM may be preferred since it has the advantage of supplying an estimate of the detectability level of the nonassociated SNPs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Todor:2013:PBN, author = "Andrei Todor and Alin Dobra and Tamer Kahveci", title = "Probabilistic Biological Network Alignment", journal = j-TCBB, volume = "10", number = "1", pages = "109--121", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.142", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Interactions between molecules are probabilistic events. An interaction may or may not happen with some probability, depending on a variety of factors such as the size, abundance, or proximity of the interacting molecules. In this paper, we consider the problem of aligning two biological networks. Unlike existing methods, we allow one of the two networks to contain probabilistic interactions. Allowing interaction probabilities makes the alignment more biologically relevant at the expense of explosive growth in the number of alternative topologies that may arise from different subsets of interactions that take place. We develop a novel method that efficiently and precisely characterizes this massive search space. We represent the topological similarity between pairs of aligned molecules (i.e., proteins) with the help of random variables and compute their expected values. We validate our method showing that, without sacrificing the running time performance, it can produce novel alignments. Our results also demonstrate that our method identifies biologically meaningful mappings under a comprehensive set of criteria used in the literature as well as the statistical coherence measure that we developed to analyze the statistical significance of the similarity of the functions of the aligned protein pairs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Biller:2013:RBP, author = "Priscila Biller and Pedro Feijao and Joao Meidanis", title = "Rearrangement-Based Phylogeny Using the Single-Cut-or-Join Operation", journal = j-TCBB, volume = "10", number = "1", pages = "122--134", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.168", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, the Single-Cut-or-Join (SCJ) operation was proposed as a basis for a new rearrangement distance between multichromosomal genomes, leading to very fast algorithms, both in theory and in practice. However, it was not clear how well this new distance fares when it comes to using it to solve relevant problems, such as the reconstruction of evolutionary history. In this paper, we advance current knowledge, by testing SCJ's ability regarding evolutionary reconstruction in two aspects: (1) How well does SCJ reconstruct evolutionary topologies? and (2) How well does SCJ reconstruct ancestral genomes? In the process of answering these questions, we implemented SCJ-based methods, and made them available to the community. We ran experiments using as many as 200 genomes, with as many as 3,000 genes. For the first question, we found out that SCJ can recover typically between 60 percent and more than 95 percent of the topology, as measured through the Robinson--Foulds distance (a.k.a. split distance) between trees. In other words, 60 percent to more than 95 percent of the original splits are also present in the reconstructed tree. For the second question, given a topology, SCJ's ability to reconstruct ancestral genomes depends on how far from the leaves the ancestral is. For nodes close to the leaves, about 85 percent of the gene adjacencies can be recovered. This percentage decreases as we move up the tree, but, even at the root, about 50 percent of the adjacencies are recovered, for as many as 64 leaves. Our findings corroborate the fact that SCJ leads to very conservative genome reconstructions, yielding very few false-positive gene adjacencies in the ancestrals, at the expense of a relatively larger amount of false negatives. In addition, experiments with real data from the Campanulaceae and Protostomes groups show that SCJ reconstructs topologies of quality comparable to the accepted trees of the species involved. As far as time is concerned, the methods we implemented can find a topology for 64 genomes with 2,000 genes each in about 10.7 minutes, and reconstruct the ancestral genomes in a 64-leaf tree in about 3 seconds, both on a typical desktop computer. It should be noted that our code is written in Java and we made no significant effort to optimize it.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Axenopoulos:2013:SDP, author = "Apostolos Axenopoulos and Petros Daras and Georgios E. Papadopoulos and Elias Houstis", title = "{SP-Dock}: Protein-Protein Docking Using Shape and Physicochemical Complementarity", journal = j-TCBB, volume = "10", number = "1", pages = "135--150", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.149", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, a framework for protein-protein docking is proposed, which exploits both shape and physicochemical complementarity to generate improved docking predictions. Shape complementarity is achieved by matching local surface patches. However, unlike existing approaches, which are based on single-patch or two-patch matching, we developed a new algorithm that compares simultaneously, groups of neighboring patches from the receptor with groups of neighboring patches from the ligand. Taking into account the fact that shape complementarity in protein surfaces is mostly approximate rather than exact, the proposed group-based matching algorithm fits perfectly to the nature of protein surfaces. This is demonstrated by the high performance that our method achieves especially in the case where the unbound structures of the proteins are considered. Additionally, several physicochemical factors, such as desolvation energy, electrostatic complementarity (EC), hydrophobicity (HP), Coulomb potential (CP), and Lennard-Jones potential are integrated using an optimized scoring function, improving geometric ranking in more than 60 percent of the complexes of Docking Benchmark 2.4.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Grunewald:2013:SCS, author = "Stefan Grunewald and Andreas Spillner and Sarah Bastkowski and Anja Bogershausen and Vincent Moulton", title = "{SuperQ}: Computing Supernetworks from Quartets", journal = j-TCBB, volume = "10", number = "1", pages = "151--160", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.8", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Supertrees are a commonly used tool in phylogenetics to summarize collections of partial phylogenetic trees. As a generalization of supertrees, phylogenetic supernetworks allow, in addition, the visual representation of conflict between the trees that is not possible to observe with a single tree. Here, we introduce SuperQ, a new method for constructing such supernetworks (SuperQ is freely available at http://www.uea.ac.uk/computing/superq). It works by first breaking the input trees into quartet trees, and then stitching these together to form a special kind of phylogenetic network, called a split network. This stitching process is performed using an adaptation of the QNet method for split network reconstruction employing a novel approach to use the branch lengths from the input trees to estimate the branch lengths in the resulting network. Compared with previous supernetwork methods, SuperQ has the advantage of producing a planar network. We compare the performance of SuperQ to the Z-closure and Q-imputation supernetwork methods, and also present an analysis of some published data sets as an illustration of its applicability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Irigoien:2013:DPI, author = "Itziar Irigoien and Francesc Mestres and Concepcion Arenas", title = "The Depth Problem: Identifying the Most Representative Units in a Data Group", journal = j-TCBB, volume = "10", number = "1", pages = "161--172", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.147", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents a solution to the problem of how to identify the units in groups or clusters that have the greatest degree of centrality and best characterize each group. This problem frequently arises in the classification of data such as types of tumor, gene expression profiles or general biomedical data. It is particularly important in the common context that many units do not properly belong to any cluster. Furthermore, in gene expression data classification, good identification of the most central units in a cluster enables recognition of the most important samples in a particular pathological process. We propose a new depth function that allows us to identify central units. As our approach is based on a measure of distance or dissimilarity between any pair of units, it can be applied to any kind of multivariate data (continuous, binary or multiattribute data). Therefore, it is very valuable in many biomedical applications, which usually involve noncontinuous data, such as clinical, pathological, or biological data sources. We validate the approach using artificial examples and apply it to empirical data. The results show the good performance of our statistical approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2013:ROC, author = "Xia Wu and Juan Li and Napatkamon Ayutyanont and Hillary Protas and William Jagust and Adam Fleisher and Eric Reiman and Li Yao and Kewei Chen", title = "The Receiver Operational Characteristic for Binary Classification with Multiple Indices and Its Application to the Neuroimaging Study of {Alzheimer}'s Disease", journal = j-TCBB, volume = "10", number = "1", pages = "173--180", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.141", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Given a single index, the receiver operational characteristic (ROC) curve analysis is routinely utilized for characterizing performances in distinguishing two conditions/groups in terms of sensitivity and specificity. Given the availability of multiple data sources (referred to as multi-indices), such as multimodal neuroimaging data sets, cognitive tests, and clinical ratings and genomic data in Alzheimer's disease (AD) studies, the single-index-based ROC underutilizes all available information. For a long time, a number of algorithmic/analytic approaches combining multiple indices have been widely used to simultaneously incorporate multiple sources. In this study, we propose an alternative for combining multiple indices using logical operations, such as ``AND,'' ``OR,'' and ``at least $ (n) $'' (where $ (n) $ is an integer), to construct multivariate ROC (multiV-ROC) and characterize the sensitivity and specificity statistically associated with the use of multiple indices. With and without the ``leave-one-out'' cross-validation, we used two data sets from AD studies to showcase the potentially increased sensitivity/specificity of the multiV-ROC in comparison to the single-index ROC and linear discriminant analysis (an analytic way of combining multi-indices). We conclude that, for the data sets we investigated, the proposed multiV-ROC approach is capable of providing a natural and practical alternative with improved classification accuracy as compared to univariate ROC and linear discriminant analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shao:2013:UMB, author = "Gui-Fang Shao and Fan Yang and Qian Zhang and Qi-Feng Zhou and Lin-Kai Luo", title = "Using the Maximum Between-Class Variance for Automatic Gridding of {cDNA} Microarray Images", journal = j-TCBB, volume = "10", number = "1", pages = "181--192", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.130", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gridding is the first and most important step to separate the spots into distinct areas in microarray image analysis. Human intervention is necessary for most gridding methods, even if some so-called fully automatic approaches also need preset parameters. The applicability of these methods is limited in certain domains and will cause variations in the gene expression results. In addition, improper gridding, which is influenced by both the misalignment and high noise level, will affect the high throughput analysis. In this paper, we have presented a fully automatic gridding technique to break through the limitation of traditional mathematical morphology gridding methods. First, a preprocessing algorithm was applied for noise reduction. Subsequently, the optimal threshold was gained by using the improved Otsu method to actually locate each spot. In order to diminish the error, the original gridding result was optimized according to the heuristic techniques by estimating the distribution of the spots. Intensive experiments on six different data sets indicate that our method is superior to the traditional morphology one and is robust in the presence of noise. More importantly, the algorithm involved in our method is simple. Furthermore, human intervention and parameters presetting are unnecessary when the algorithm is applied in different types of microarray images.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lamiable:2013:AGT, author = "Alexis Lamiable and Franck Quessette and Sandrine Vial and Dominique Barth and Alain Denise", title = "An Algorithmic Game-Theory Approach for Coarse-Grain Prediction of {RNA} {$3$D} Structure", journal = j-TCBB, volume = "10", number = "1", pages = "193--199", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.148", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a new approach for the prediction of the coarse-grain 3D structure of RNA molecules. We model a molecule as being made of helices and junctions. Those junctions are classified into topological families that determine their preferred 3D shapes. All the parts of the molecule are then allowed to establish long-distance contacts that induce a 3D folding of the molecule. An algorithm relying on game theory is proposed to discover such long-distance contacts that allow the molecule to reach a Nash equilibrium. As reported by our experiments, this approach allows one to predict the global shape of large molecules of several hundreds of nucleotides that are out of reach of the state-of-the-art methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ovaska:2013:GRO, author = "Kristian Ovaska and Lauri Lyly and Biswajyoti Sahu and Olli A. Janne and Sampsa Hautaniemi", title = "Genomic Region Operation Kit for Flexible Processing of Deep Sequencing Data", journal = j-TCBB, volume = "10", number = "1", pages = "200--206", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.170", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational analysis of data produced in deep sequencing (DS) experiments is challenging due to large data volumes and requirements for flexible analysis approaches. Here, we present a mathematical formalism based on set algebra for frequently performed operations in DS data analysis to facilitate translation of biomedical research questions to language amenable for computational analysis. With the help of this formalism, we implemented the Genomic Region Operation Kit (GROK), which supports various DS-related operations such as preprocessing, filtering, file conversion, and sample comparison. GROK provides high-level interfaces for R, Python, Lua, and command line, as well as an extension C++ API. It supports major genomic file formats and allows storing custom genomic regions in efficient data structures such as red-black trees and SQL databases. To demonstrate the utility of GROK, we have characterized the roles of two major transcription factors (TFs) in prostate cancer using data from 10 DS experiments. GROK is freely available with a user guide from {\tt http://csbi.ltdk.helsinki.fi/grok/}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wan:2013:HFA, author = "Xiang Wan and Can Yang and Qiang Yang and Hongyu Zhao and Weichuan Yu", title = "{HapBoost}: a Fast Approach to Boosting Haplotype Association Analyses in Genome-Wide Association Studies", journal = j-TCBB, volume = "10", number = "1", pages = "207--212", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.6", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome-wide association study (GWAS) has been successful in identifying genetic variants that are associated with complex human diseases. In GWAS, multilocus association analyses through linkage disequilibrium (LD), named haplotype-based analyses, may have greater power than single-locus analyses for detecting disease susceptibility loci. However, the large number of SNPs genotyped in GWAS poses great computational challenges in the detection of haplotype associations. We present a fast method named HapBoost for finding haplotype associations, which can be applied to quickly screen the whole genome. The effectiveness of HapBoost is demonstrated by using both synthetic and real data sets. The experimental results show that the proposed approach can achieve comparably accurate results while it performs much faster than existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Howison:2013:HTC, author = "Mark Howison", title = "High-Throughput Compression of {FASTQ} Data with {SeqDB}", journal = j-TCBB, volume = "10", number = "1", pages = "213--218", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.160", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Compression has become a critical step in storing next-generation sequencing (NGS) data sets because of both the increasing size and decreasing costs of such data. Recent research into efficiently compressing sequence data has focused largely on improving compression ratios. Yet, the throughputs of current methods now lag far behind the I/O bandwidths of modern storage systems. As biologists move their analyses to high-performance systems with greater I/O bandwidth, low-throughput compression becomes a limiting factor. To address this gap, we present a new storage model called SeqDB, which offers high-throughput compression of sequence data with minimal sacrifice in compression ratio. It achieves this by combining the existing multithreaded Blosc compressor with a new data-parallel byte-packing scheme, called SeqPack, which interleaves sequence data and quality scores.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2013:ISI, author = "Yuan Zhu and Xiao-Fei Zhang and Dao-Qing Dai and Meng-Yun Wu", title = "Identifying Spurious Interactions and Predicting Missing Interactions in the Protein-Protein Interaction Networks via a Generative Network Model", journal = j-TCBB, volume = "10", number = "1", pages = "219--225", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.164", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the rapid development of high-throughput experiment techniques for protein-protein interaction (PPI) detection, a large amount of PPI network data are becoming available. However, the data produced by these techniques have high levels of spurious and missing interactions. This study assigns a new reliably indication for each protein pairs via the new generative network model (RIGNM) where the scale-free property of the PPI network is considered to reliably identify both spurious and missing interactions in the observed high-throughput PPI network. The experimental results show that the RIGNM is more effective and interpretable than the compared methods, which demonstrate that this approach has the potential to better describe the PPI networks and drive new discoveries.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Poleksic:2013:IAM, author = "Aleksandar Poleksic", title = "Improved Algorithms for Matching $r$-Separated Sets with Applications to Protein Structure Alignment", journal = j-TCBB, volume = "10", number = "1", pages = "226--229", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.135", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Largest Common Point-set (LCP) and the Pattern Matching (PM) problems have received much attention in the fields of pattern matching, computer vision and computational biology. Perhaps, the most important application of these problems is the protein structural alignment, which seeks to find a superposition of a pair of input proteins that maximizes a given protein structure similarity metric. Although it has been shown that LCP and PM are both tractable problems, the running times of existing algorithms are high-degree polynomials. Here, we present novel methods for finding approximate and exact threshold-LCP and threshold-PM for r-separated sets, in general, and protein 3D structures, in particular. Improved running times of our methods are achieved by building upon several different, previously published techniques.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2013:MAD, author = "Xiaowei Zhou and Can Yang and Xiang Wan and Hongyu Zhao and Weichuan Yu", title = "Multisample {aCGH} Data Analysis via Total Variation and Spectral Regularization", journal = j-TCBB, volume = "10", number = "1", pages = "230--235", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.166", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "DNA copy number variation (CNV) accounts for a large proportion of genetic variation. One commonly used approach to detecting CNVs is array-based comparative genomic hybridization (aCGH). Although many methods have been proposed to analyze aCGH data, it is not clear how to combine information from multiple samples to improve CNV detection. In this paper, we propose to use a matrix to approximate the multisample aCGH data and minimize the total variation of each sample as well as the nuclear norm of the whole matrix. In this way, we can make use of the smoothness property of each sample and the correlation among multiple samples simultaneously in a convex optimization framework. We also developed an efficient and scalable algorithm to handle large-scale data. Experiments demonstrate that the proposed method outperforms the state-of-the-art techniques under a wide range of scenarios and it is capable of processing large data sets with millions of probes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Caceres:2013:WSN, author = "Alan Joseph J. Caceres and Juan Castillo and Jinnie Lee and Katherine {St. John}", title = "Walks on {SPR} Neighborhoods", journal = j-TCBB, volume = "10", number = "1", pages = "236--239", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.136", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A nearest-neighbor-interchange (NNI)-walk is a sequence of unrooted phylogenetic trees, {$ (T_1, T_2, \ldots, T_k) $} where each consecutive pair of trees differs by a single NNI move. We give tight bounds on the length of the shortest NNI-walks that visit all trees in a subtree-prune-and-regraft (SPR) neighborhood of a given tree. For any unrooted, binary tree, {$ (T) $}, on $ (n) $ leaves, the shortest walk takes {$ (\Theta (n^2)) $} additional steps more than the number of trees in the SPR neighborhood. This answers Bryant's Second Combinatorial Challenge from the Phylogenetics Challenges List, the Isaac Newton Institute, 2011, and the Penny Ante Problem List, 2009.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2013:RL, author = "Anonymous", title = "2012 Reviewers List", journal = j-TCBB, volume = "10", number = "1", pages = "240--243", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.51", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The publication offers a note of thanks and lists its reviewers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Title:2013:AI, author = "Title", title = "2012 Annual Index", journal = j-TCBB, volume = "10", number = "1", pages = "244--270", month = jan, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.42", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 10 07:28:56 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This index covers all technical items --- papers, correspondence, reviews, etc. --- that appeared in this periodical during 2012, and items from previous years that were commented upon or corrected in 2012. Departments and other items may also be covered if they have been judged to have archival value. The Author Index contains the primary entry for each item, listed under the first author's name. The primary entry includes the coauthors' names, the title of the paper or other item, and its location, specified by the publication abbreviation, year, month, and inclusive pagination. The Subject Index contains entries describing the item under all appropriate subject headings, plus the First author's name, the publication abbreviation, month, and year, and inclusive pages. Note that the item title is found only under the primary entry in the Author Index.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2013:GEA, author = "Yi-Ping Phoebe Chen", title = "Guest Editorial: Advanced Algorithms of Bioinformatics", journal = j-TCBB, volume = "10", number = "2", pages = "273--273", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.93", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lei:2013:CRC, author = "Jikai Lei and Prapaporn Techa-angkoon and Yanni Sun", title = "{Chain-RNA}: a Comparative {ncRNA} Search Tool Based on the Two-Dimensional Chain Algorithm", journal = j-TCBB, volume = "10", number = "2", pages = "274--285", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Noncoding RNA (ncRNA) identification is highly important to modern biology. The state-of-the-art method for ncRNA identification is based on comparative genomics, in which evolutionary conservations of sequences and secondary structures provide important evidence for ncRNA search. For ncRNAs with low sequence conservation but high structural similarity, conventional local alignment tools such as BLAST yield low sensitivity. Thus, there is a need for ncRNA search methods that can incorporate both sequence and structural similarities. We introduce chain-RNA, a pairwise structural alignment tool that can effectively locate cross-species conserved RNA elements with low sequence similarity. In chain-RNA, stem-loop structures are extracted from dot plots generated by an efficient local-folding algorithm. Then, we formulate stem alignment as an extended 2D chain problem and employ existing chain algorithms. Chain-RNA is tested on a data set containing annotated ncRNA homologs and is applied to novel ncRNA search in a transcriptomic data set. The experimental results show that chain-RNA has better tradeoff between sensitivity and false positive rate in ncRNA prediction than conventional sequence similarity search tools and is more time efficient than structural alignment tools. The source codes of chain-RNA can be downloaded at http://sourceforge.net/projects/chain-rna/ or at http://www.cse.msu.edu/~leijikai/chain-rna/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Maji:2013:RFC, author = "Pradipta Maji and Sushmita Paul", title = "Rough-Fuzzy Clustering for Grouping Functionally Similar Genes from Microarray Data", journal = j-TCBB, volume = "10", number = "2", pages = "286--299", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.103", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene expression data clustering is one of the important tasks of functional genomics as it provides a powerful tool for studying functional relationships of genes in a biological process. Identifying coexpressed groups of genes represents the basic challenge in gene clustering problem. In this regard, a gene clustering algorithm, termed as robust rough-fuzzy $ (c) $-means, is proposed judiciously integrating the merits of rough sets and fuzzy sets. While the concept of lower and upper approximations of rough sets deals with uncertainty, vagueness, and incompleteness in cluster definition, the integration of probabilistic and possibilistic memberships of fuzzy sets enables efficient handling of overlapping partitions in noisy environment. The concept of possibilistic lower bound and probabilistic boundary of a cluster, introduced in robust rough-fuzzy $ (c) $-means, enables efficient selection of gene clusters. An efficient method is proposed to select initial prototypes of different gene clusters, which enables the proposed $ (c) $-means algorithm to converge to an optimum or near optimum solutions and helps to discover coexpressed gene clusters. The effectiveness of the algorithm, along with a comparison with other algorithms, is demonstrated both qualitatively and quantitatively on 14 yeast microarray data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Le:2013:CRT, author = "Ngoc Tu Le and Tu Bao Ho and Bich Hai Ho", title = "Computational Reconstruction of Transcriptional Relationships from {ChIP}-Chip Data", journal = j-TCBB, volume = "10", number = "2", pages = "300--307", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Eukaryotic gene transcription is a complex process, which requires the orchestrated recruitment of a large number of proteins, such as sequence-specific DNA binding factors, chromatin remodelers and modifiers, and general transcription machinery, to regulatory regions. Previous works have shown that these regulatory proteins favor specific organizational theme along promoters. Details about how they cooperatively regulate transcriptional process, however, remain unclear. We developed a method to reconstruct a Bayesian network (BN) model representing functional relationships among various transcriptional components. The positive/negative influence between these components was measured from protein binding and nucleosome occupancy data and embedded into the model. Application on S.cerevisiae ChIP-Chip data showed that the proposed method can recover confirmed relationships, such as Isw1-Pol II, TFIIH-Pol II, TFIIB-TBP, Pol II-H3K36Me3, H3K4Me3-H3K14Ac, etc. Moreover, it can distinguish colocating components from functionally related ones. Novel relationships, e.g., ones between Mediator and chromatin remodeling complexes (CRCs), and the combinatorial regulation of Pol II recruitment and activity by CRCs and general transcription factors (GTFs), were also suggested. Conclusion: protein binding events during transcription positively influence each other. Among contributing components, GTFs and CRCs play pivotal roles in transcriptional regulation. These findings provide insights into the regulatory mechanism.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fages:2013:GEI, author = "Fran{\c{c}}ois Fages and Sylvain Soliman", title = "{Guest Editors}' Introduction to the Special Section on Computational Methods in Systems Biology", journal = j-TCBB, volume = "10", number = "2", pages = "308--309", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.94", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Henzinger:2013:PAC, author = "Thomas A. Henzinger and Maria Mateescu", title = "The Propagation Approach for Computing Biochemical Reaction Networks", journal = j-TCBB, volume = "10", number = "2", pages = "310--322", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.91", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We introduce propagation models (PMs), a formalism able to express several kinds of equations that describe the behavior of biochemical reaction networks. Furthermore, we introduce the propagation abstract data type (PADT), which separates concerns regarding different numerical algorithms for the transient analysis of biochemical reaction networks from concerns regarding their implementation, thus allowing for portable and efficient solutions. The state of a propagation abstract data type is given by a vector that assigns mass values to a set of nodes, and its $ ({\bf next}) $ operator propagates mass values through this set of nodes. We propose an approximate implementation of the $ ({\bf next}) $ operator, based on threshold abstraction, which propagates only ``significant'' mass values and thus achieves a compromise between efficiency and accuracy. Finally, we give three use cases for propagation models: the chemical master equation (CME), the reaction rate equation (RRE), and a hybrid method that combines these two equations. These three applications use propagation models in order to propagate probabilities and/or expected values and variances of the model's variables.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Murthy:2013:CAC, author = "Abhishek Murthy and Ezio Bartocci and Flavio H. Fenton and James Glimm and Richard A. Gray and Elizabeth M. Cherry and Scott A. Smolka and Radu Grosu", title = "Curvature Analysis of Cardiac Excitation Wavefronts", journal = j-TCBB, volume = "10", number = "2", pages = "323--336", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.125", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present the Spiral Classification Algorithm (SCA), a fast and accurate algorithm for classifying electrical spiral waves and their associated breakup in cardiac tissues. The classification performed by SCA is an essential component of the detection and analysis of various cardiac arrhythmic disorders, including ventricular tachycardia and fibrillation. Given a digitized frame of a propagating wave, SCA constructs a highly accurate representation of the front and the back of the wave, piecewise interpolates this representation with cubic splines, and subjects the result to an accurate curvature analysis. This analysis is more comprehensive than methods based on spiral-tip tracking, as it considers the entire wave front and back. To increase the smoothness of the resulting symbolic representation, the SCA uses weighted overlapping of adjacent segments which increases the smoothness at join points. SCA has been applied to a number of representative types of spiral waves, and, for each type, a distinct curvature evolution in time (signature) has been identified. Distinct signatures have also been identified for spiral breakup. These results represent a significant first step in automatically determining parameter ranges for which a computational cardiac-cell network accurately reproduces a particular kind of cardiac arrhythmia, such as ventricular fibrillation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gao:2013:MMA, author = "Qian Gao and David Gilbert and Monika Heiner and Fei Liu and Daniele Maccagnola and David Tree", title = "Multiscale Modeling and Analysis of Planar Cell Polarity in the \bioname{Drosophila} Wing", journal = j-TCBB, volume = "10", number = "2", pages = "337--351", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.101", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Modeling across multiple scales is a current challenge in Systems Biology, especially when applied to multicellular organisms. In this paper, we present an approach to model at different spatial scales, using the new concept of Hierarchically Colored Petri Nets (HCPN). We apply HCPN to model a tissue comprising multiple cells hexagonally packed in a honeycomb formation in order to describe the phenomenon of Planar Cell Polarity (PCP) signaling in Drosophila wing. We have constructed a family of related models, permitting different hypotheses to be explored regarding the mechanisms underlying PCP. In addition our models include the effect of well-studied genetic mutations. We have applied a set of analytical techniques including clustering and model checking over time series of primary and secondary data. Our models support the interpretation of biological observations reported in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bozdag:2013:GTA, author = "Serdar Bozdag and Timothy J. Close and Stefano Lonardi", title = "A Graph-Theoretical Approach to the Selection of the Minimum Tiling Path from a Physical Map", journal = j-TCBB, volume = "10", number = "2", pages = "352--360", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.26", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of computing the minimum tiling path (MTP) from a set of clones arranged in a physical map is a cornerstone of hierarchical (clone-by-clone) genome sequencing projects. We formulate this problem in a graph theoretical framework, and then solve by a combination of minimum hitting set and minimum spanning tree algorithms. The tool implementing this strategy, called FMTP, shows improved performance compared to the widely used software FPC. When we execute FMTP and FPC on the same physical map, the MTP produced by FMTP covers a higher portion of the genome, and uses a smaller number of clones. For instance, on the rice genome the MTP produced by our tool would reduce by about 11 percent the cost of a clone-by-clone sequencing project. Source code, benchmark data sets, and documentation of FMTP are freely available at {\tt http://code.google.com/p/fingerprint-based-minimal-tiling-path/} under MIT license.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2013:EBC, author = "Cheng-Hong Yang and Yu-Da Lin and Li-Yeh Chaung and Hsueh-Wei Chang", title = "Evaluation of Breast Cancer Susceptibility Using Improved Genetic Algorithms to Generate Genotype {SNP} Barcodes", journal = j-TCBB, volume = "10", number = "2", pages = "361--371", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.27", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genetic association is a challenging task for the identification and characterization of genes that increase the susceptibility to common complex multifactorial diseases. To fully execute genetic studies of complex diseases, modern geneticists face the challenge of detecting interactions between loci. A genetic algorithm (GA) is developed to detect the association of genotype frequencies of cancer cases and noncancer cases based on statistical analysis. An improved genetic algorithm (IGA) is proposed to improve the reliability of the GA method for high-dimensional SNP-SNP interactions. The strategy offers the top five results to the random population process, in which they guide the GA toward a significant search course. The IGA increases the likelihood of quickly detecting the maximum ratio difference between cancer cases and noncancer cases. The study systematically evaluates the joint effect of 23 SNP combinations of six steroid hormone metabolisms, and signaling-related genes involved in breast carcinogenesis pathways were systematically evaluated, with IGA successfully detecting significant ratio differences between breast cancer cases and noncancer cases. The possible breast cancer risks were subsequently analyzed by odds-ratio (OR) and risk-ratio analysis. The estimated OR of the best SNP barcode is significantly higher than 1 (between 1.15 and 7.01) for specific combinations of two to 13 SNPs. Analysis results support that the IGA provides higher ratio difference values than the GA between breast cancer cases and noncancer cases over 3-SNP to 13-SNP interactions. A more specific SNP-SNP interaction profile for the risk of breast cancer is also provided.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2013:FNF, author = "Jiaoyun Yang and Yun Xu and Xiaohui Yao and Guoliang Chen", title = "{FNphasing}: a Novel Fast Heuristic Algorithm for Haplotype Phasing Based on Flow Network Model", journal = j-TCBB, volume = "10", number = "2", pages = "372--382", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.18", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An enormous amount of sequence data has been generated with the development of new DNA sequencing technologies, which presents great challenges for computational biology problems such as haplotype phasing. Although arduous efforts have been made to address this problem, the current methods still cannot efficiently deal with the incoming flood of large-scale data. In this paper, we propose a flow network model to tackle haplotype phasing problem, and explain some classical haplotype phasing rules based on this model. By incorporating the heuristic knowledge obtained from these classical rules, we design an algorithm FNphasing based on the flow network model. Theoretically, the time complexity of our algorithm is {$ (O(n^2 m + m^2)) $}, which is better than that of 2SNP, one of the most efficient algorithms currently. After testing the performance of FNphasing with several simulated data sets, the experimental results show that when applied on large-scale data sets, our algorithm is significantly faster than the state-of-the-art Beagle algorithm. FNphasing also achieves an equal or superior accuracy compared with other approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lazar:2013:GNA, author = "Cosmin Lazar and Jonatan Taminau and Stijn Meganck and David Steenhoff and Alain Coletta and David Y. Weiss Solis and Colin Molter and Robin Duque and Hugues Bersini and Ann Nowe", title = "{GENESHIFT}: a Nonparametric Approach for Integrating Microarray Gene Expression Data Based on the Inner Product as a Distance Measure between the Distributions of Genes", journal = j-TCBB, volume = "10", number = "2", pages = "383--392", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.12", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The potential of microarray gene expression (MAGE) data is only partially explored due to the limited number of samples in individual studies. This limitation can be surmounted by merging or integrating data sets originating from independent MAGE experiments, which are designed to study the same biological problem. However, this process is hindered by batch effects that are study-dependent and result in random data distortion; therefore numerical transformations are needed to render the integration of different data sets accurate and meaningful. Our contribution in this paper is two-fold. First we propose GENESHIFT, a new nonparametric batch effect removal method based on two key elements from statistics: empirical density estimation and the inner product as a distance measure between two probability density functions; second we introduce a new validation index of batch effect removal methods based on the observation that samples from two independent studies drawn from a same population should exhibit similar probability density functions. We evaluated and compared the GENESHIFT method with four other state-of-the-art methods for batch effect removal: Batch-mean centering, empirical Bayes or COMBAT, distance-weighted discrimination, and cross-platform normalization. Several validation indices providing complementary information about the efficiency of batch effect removal methods have been employed in our validation framework. The results show that none of the methods clearly outperforms the others. More than that, most of the methods used for comparison perform very well with respect to some validation indices while performing very poor with respect to others. GENESHIFT exhibits robust performances and its average rank is the highest among the average ranks of all methods used for comparison.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bernardini:2013:GRN, author = "Camilla Bernardini and Federica Censi and Wanda Lattanzi and Giovanni Calcagnini and Alessandro Giuliani", title = "Gene Regulation Networks in Early Phase of {Duchenne} Muscular Dystrophy", journal = j-TCBB, volume = "10", number = "2", pages = "393--400", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.24", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The aim of this study was to analyze previously published gene expression data of skeletal muscle biopsies of Duchenne muscular dystrophy (DMD) patients and controls (gene expression omnibus database, accession \#GSE6011) using systems biology approaches. We applied an unsupervised method to discriminate patient and control populations, based on principal component analysis, using the gene expressions as units and patients as variables. The genes having the highest absolute scores in the discrimination between the groups, were then analyzed in terms of gene expression networks, on the basis of their mutual correlation in the two groups. The correlation network structures suggest two different modes of gene regulation in the two groups, reminiscent of important aspects of DMD pathogenesis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Baya:2013:HMC, author = "Ariel E. Baya and Pablo M. Granitto", title = "How Many Clusters: a Validation Index for Arbitrary-Shaped Clusters", journal = j-TCBB, volume = "10", number = "2", pages = "401--414", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.32", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Clustering validation indexes are intended to assess the goodness of clustering results. Many methods used to estimate the number of clusters rely on a validation index as a key element to find the correct answer. This paper presents a new validation index based on graph concepts, which has been designed to find arbitrary shaped clusters by exploiting the spatial layout of the patterns and their clustering label. This new clustering index is combined with a solid statistical detection framework, the gap statistic. The resulting method is able to find the right number of arbitrary-shaped clusters in diverse situations, as we show with examples where this information is available. A comparison with several relevant validation methods is carried out using artificial and gene expression data sets. The results are very encouraging, showing that the underlying structure in the data can be more accurately detected with the new clustering index. Our gene expression data results also indicate that this new index is stable under perturbation of the input data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2013:MFP, author = "Qingfeng Chen and Wei Lan and Jianxin Wang", title = "Mining Featured Patterns of {MiRNA} Interaction Based on Sequence and Structure Similarity", journal = j-TCBB, volume = "10", number = "2", pages = "415--422", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.5", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNA (miRNA) is an endogenous small noncoding RNA that plays an important role in gene expression through the post-transcriptional gene regulation pathways. There are many literature works focusing on predicting miRNA targets and exploring gene regulatory networks of miRNA families. We suggest, however, the study to identify the interaction between miRNAs is insufficient. This paper presents a framework to identify relationships between miRNAs using joint entropy, to investigate the regulatory features of miRNAs. Both the sequence and secondary structure are taken into consideration to make our method more relevant from the biological viewpoint. Further, joint entropy is applied to identify correlated miRNAs, which are more desirable from the perspective of the gene regulatory network. A data set including {\em Drosophila melanogaster\/} and \bioname{Anopheles gambiae\/} is used in the experiment. The results demonstrate that our approach is able to identify known miRNA interaction and uncover novel patterns of miRNA regulatory network.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Maulik:2013:MQB, author = "Ujjwal Maulik and Anirban Mukhopadhyay and Malay Bhattacharyya and Lars Kaderali and Benedikt Brors and Sanghamitra Bandyopadhyay and Roland Eils", title = "Mining Quasi-Bicliques from {HIV-1}-Human Protein Interaction Network: a Multiobjective Biclustering Approach", journal = j-TCBB, volume = "10", number = "2", pages = "423--435", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.139", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, we model the problem of mining quasi-bicliques from weighted viral-host protein-protein interaction network as a biclustering problem for identifying strong interaction modules. In this regard, a multiobjective genetic algorithm-based biclustering technique is proposed that simultaneously optimizes three objective functions to obtain dense biclusters having high mean interaction strengths. The performance of the proposed technique has been compared with that of other existing biclustering methods on an artificial data. Subsequently, the proposed biclustering method is applied on the records of biologically validated and predicted interactions between a set of HIV-1 proteins and a set of human proteins to identify strong interaction modules. For this, the entire interaction information is realized as a bipartite graph. We have further investigated the biological significance of the obtained biclusters. The human proteins involved in the strong interaction module have been found to share common biological properties and they are identified as the gateways of viral infection leading to various diseases. These human proteins can be potential drug targets for developing anti-HIV drugs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2013:MLR, author = "Xiao Wang and Guo-Zheng Li", title = "Multilabel Learning via Random Label Selection for Protein Subcellular Multilocations Prediction", journal = j-TCBB, volume = "10", number = "2", pages = "436--446", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.21", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of protein subcellular localization is an important but challenging problem, particularly when proteins may simultaneously exist at, or move between, two or more different subcellular location sites. Most of the existing protein subcellular localization methods are only used to deal with the single-location proteins. In the past few years, only a few methods have been proposed to tackle proteins with multiple locations. However, they only adopt a simple strategy, that is, transforming the multilocation proteins to multiple proteins with single location, which does not take correlations among different subcellular locations into account. In this paper, a novel method named random label selection (RALS) (multilabel learning via RALS), which extends the simple binary relevance (BR) method, is proposed to learn from multilocation proteins in an effective and efficient way. RALS does not explicitly find the correlations among labels, but rather implicitly attempts to learn the label correlations from data by augmenting original feature space with randomly selected labels as its additional input features. Through the fivefold cross-validation test on a benchmark data set, we demonstrate our proposed method with consideration of label correlations obviously outperforms the baseline BR method without consideration of label correlations, indicating correlations among different subcellular locations really exist and contribute to improvement of prediction performance. Experimental results on two benchmark data sets also show that our proposed methods achieve significantly higher performance than some other state-of-the-art methods in predicting subcellular multilocations of proteins. The prediction web server is available at {\tt http://levis.tongji.edu.cn:8080/bioinfo/MLPred-Euk/} for the public usage.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2013:NLS, author = "Yifeng Li and Alioune Ngom", title = "Nonnegative Least-Squares Methods for the Classification of High-Dimensional Biological Data", journal = j-TCBB, volume = "10", number = "2", pages = "447--456", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.30", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microarray data can be used to detect diseases and predict responses to therapies through classification models. However, the high dimensionality and low sample size of such data result in many computational problems such as reduced prediction accuracy and slow classification speed. In this paper, we propose a novel family of nonnegative least-squares classifiers for high-dimensional microarray gene expression and comparative genomic hybridization data. Our approaches are based on combining the advantages of using local learning, transductive learning, and ensemble learning, for better prediction performance. To study the performances of our methods, we performed computational experiments on 17 well-known data sets with diverse characteristics. We have also performed statistical comparisons with many classification techniques including the well-performing SVM approach and two related but recent methods proposed in literature. Experimental results show that our approaches are faster and achieve generally a better prediction performance over compared methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2013:NFV, author = "Hong-Jie Yu and De-Shuang Huang", title = "Normalized Feature Vectors: a Novel Alignment-Free Sequence Comparison Method Based on the Numbers of Adjacent Amino Acids", journal = j-TCBB, volume = "10", number = "2", pages = "457--467", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.10", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Based on all kinds of adjacent amino acids (AAA), we map each protein primary sequence into a 400 by ({$ (L - 1) $}) matrix {$ ({\schmi M}) $}. In addition, we further derive a normalized 400-tuple mathematical descriptors {$ ({\schmi D}) $}, which is extracted from the primary protein sequences via singular values decomposition (SVD) of the matrix. The obtained 400-D normalized feature vectors (NFVs) further facilitate our quantitative analysis of protein sequences. Using the normalized representation of the primary protein sequences, we analyze the similarity for different sequences upon two data sets: (1) ND5 sequences from nine species and (2) transferrin sequences of 24 vertebrates. We also compared the results in this study with those from other related works. These two experiments illustrate that our proposed NFV-AAA approach does perform well in the field of similarity analysis of sequence.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tu:2013:INR, author = "Chien-Ta Tu and Bor-Sen Chen", title = "On the Increase in Network Robustness and Decrease in Network Response Ability during the Aging Process: a Systems Biology Approach via Microarray Data", journal = j-TCBB, volume = "10", number = "2", pages = "468--480", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.23", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Aging, an extremely complex and system-level process, has attracted much attention in medical research, especially since chronic diseases are quite prevalent in the elderly population. These may be the result of both gene mutations that lead to intrinsic perturbations and environmental changes that may stimulate signaling in the body. Therefore, analysis of network robustness to tolerate intrinsic perturbations and network response ability of gene networks to respond to external stimuli during the aging process may provide insight into the systematic changes caused by aging. We first propose novel methods to estimate network robustness and measure network response ability of gene regulatory networks by using their corresponding microarray data in the aging process. Then, we find that an aging-related gene network is more robust to intrinsic perturbations in the elderly than the young, and therefore is less responsive to external stimuli. Finally, we find that the response abilities of individual genes, especially FOXOs, NF-{\^I}${}^o$B, and p53, are significantly different in the young versus the aged subjects. These observations are consistent with experimental findings in the aged population, e.g., elevated incidence of tumorigenesis and diminished resistance to oxidative stress. The proposed method can also be used for exploring and analyzing the dynamic properties of other biological processes via corresponding microarray data to provide useful information on clinical strategy and drug target selection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DeRonne:2013:POP, author = "Kevin W. DeRonne and George Karypis", title = "{Pareto} Optimal Pairwise Sequence Alignment", journal = j-TCBB, volume = "10", number = "2", pages = "481--493", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sequence alignment using evolutionary profiles is a commonly employed tool when investigating a protein. Many profile-profile scoring functions have been developed for use in such alignments, but there has not yet been a comprehensive study of Pareto optimal pairwise alignments for combining multiple such functions. We show that the problem of generating Pareto optimal pairwise alignments has an optimal substructure property, and develop an efficient algorithm for generating Pareto optimal frontiers of pairwise alignments. All possible sets of two, three, and four profile scoring functions are used from a pool of 11 functions and applied to 588 pairs of proteins in the ce\_ref data set. The performance of the best objective combinations on ce\_ref is also evaluated on an independent set of 913 protein pairs extracted from the BAliBASE RV11 data set. Our dynamic-programming-based heuristic approach produces approximated Pareto optimal frontiers of pairwise alignments that contain comparable alignments to those on the exact frontier, but on average in less than 1/58th the time in the case of four objectives. Our results show that the Pareto frontiers contain alignments whose quality is better than the alignments obtained by single objectives. However, the task of identifying a single high-quality alignment among those in the Pareto frontier remains challenging.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tsai:2013:PBL, author = "Tsung-Heng Tsai and Mahlet G. Tadesse and Yue Wang and Habtom W. Ressom", title = "Profile-Based {LC-MS} Data Alignment --- a {Bayesian} Approach", journal = j-TCBB, volume = "10", number = "2", pages = "494--503", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.25", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A Bayesian alignment model (BAM) is proposed for alignment of liquid chromatography-mass spectrometry (LC-MS) data. BAM belongs to the category of profile-based approaches, which are composed of two major components: a prototype function and a set of mapping functions. Appropriate estimation of these functions is crucial for good alignment results. BAM uses Markov chain Monte Carlo (MCMC) methods to draw inference on the model parameters and improves on existing MCMC-based alignment methods through (1) the implementation of an efficient MCMC sampler and (2) an adaptive selection of knots. A block Metropolis--Hastings algorithm that mitigates the problem of the MCMC sampler getting stuck at local modes of the posterior distribution is used for the update of the mapping function coefficients. In addition, a stochastic search variable selection (SSVS) methodology is used to determine the number and positions of knots. We applied BAM to a simulated data set, an LC-MS proteomic data set, and two LC-MS metabolomic data sets, and compared its performance with the Bayesian hierarchical curve registration (BHCR) model, the dynamic time-warping (DTW) model, and the continuous profile model (CPM). The advantage of applying appropriate profile-based retention time correction prior to performing a feature-based approach is also demonstrated through the metabolomic data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rud:2013:RFL, author = "Ali Gholami Rud and Saeed Shahrivari and Saeed Jalili and Zahra Razaghi Moghadam Kashani", title = "{RANGI}: a Fast List-Colored Graph Motif Finding Algorithm", journal = j-TCBB, volume = "10", number = "2", pages = "504--513", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.167", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Given a multiset of colors as the query and a list-colored graph, i.e., an undirected graph with a set of colors assigned to each of its vertices, in the NP-hard list-colored graph motif problem the goal is to find the largest connected subgraph such that one can select a color from the set of colors assigned to each of its vertices to obtain a subset of the query. This problem was introduced to find functional motifs in biological networks. We present a branch-and-bound algorithm named RANGI for finding and enumerating list-colored graph motifs. As our experimental results show, RANGI's pruning methods and heuristics make it quite fast in practice compared to the algorithms presented in the literature. We also present a parallel version of RANGI that achieves acceptable scalability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2013:RSN, author = "Wei Liu and Dong Li and Yunping Zhu and Hongwei Xie and Fuchu He", title = "Reconstruction of Signaling Network from Protein Interactions Based on Function Annotations", journal = j-TCBB, volume = "10", number = "2", pages = "514--521", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.20", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The directionality of protein interactions is the prerequisite of forming various signaling networks, and the construction of signaling networks is a critical issue in the discovering the mechanism of the life process. In this paper, we proposed a novel method to infer the directionality in protein-protein interaction networks and furthermore construct signaling networks. Based on the functional annotations of proteins, we proposed a novel parameter GODS and established the prediction model. This method shows high sensitivity and specificity to predict the directionality of protein interactions, evaluated by fivefold cross validation. By taking the threshold value of GODS as 2, we achieved accuracy 95.56 percent and coverage 74.69 percent in the human test set. Also, this method was successfully applied to reconstruct the classical signaling pathways in human. This study not only provided an effective method to unravel the unknown signaling pathways, but also the deeper understanding for the signaling networks, from the aspect of protein function.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gorecki:2013:UTR, author = "Pawel Gorecki and Oliver Eulenstein and Jerzy Tiuryn", title = "Unrooted Tree Reconciliation: a Unified Approach", journal = j-TCBB, volume = "10", number = "2", pages = "522--536", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.22", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tree comparison functions are widely used in phylogenetics for comparing evolutionary trees. Unrooted trees can be compared with rooted trees by identifying all rootings of the unrooted tree that minimize some provided comparison function between two rooted trees. The plateau property is satisfied by the provided function, if all optimal rootings form a subtree, or plateau, in the unrooted tree, from which the rootings along every path toward a leaf have monotonically increasing costs. This property is sufficient for the linear-time identification of all optimal rootings and rooting costs. However, the plateau property has only been proven for a few rooted comparison functions, requiring individual proofs for each function without benefitting from inherent structural features of such functions. Here, we introduce the consistency condition that is sufficient for a general function to satisfy the plateau property. For consistent functions, we introduce general linear-time solutions that identify optimal rootings and all rooting costs. Further, we identify novel relationships between consistent functions in terms of plateaus, especially the plateau of the well-studied duplication-loss function is part of a plateau of every other consistent function. We introduce a novel approach for identifying consistent cost functions by defining a formal language of Boolean costs. Formulas in this language can be interpreted as cost functions. Finally, we demonstrate the performance of our general linear-time solutions in practice using empirical and simulation studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meskin:2013:PEB, author = "N. Meskin and H. Nounou and M. Nounou and A. Datta", title = "Parameter Estimation of Biological Phenomena: an Unscented {Kalman} Filter Approach", journal = j-TCBB, volume = "10", number = "2", pages = "537--543", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.19", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent advances in high-throughput technologies for biological data acquisition have spurred a broad interest in the construction of mathematical models for biological phenomena. The development of such mathematical models relies on the estimation of unknown parameters of the system using the time-course profiles of different metabolites in the system. One of the main challenges in the parameter estimation of biological phenomena is the fact that the number of unknown parameters is much more than the number of metabolites in the system. Moreover, the available metabolite measurements are corrupted by noise. In this paper, a new parameter estimation algorithm is developed based on the stochastic estimation framework for nonlinear systems, namely the unscented Kalman filter (UKF). A new iterative UKF algorithm with covariance resetting is developed in which the UKF algorithm is applied iteratively to the available noisy time profiles of the metabolites. The proposed estimation algorithm is applied to noisy time-course data synthetically produced from a generic branched pathway as well as real time-course profile for the Cad system of E. coli. The simulation results demonstrate the effectiveness of the proposed scheme.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2013:CPSa, author = "Anonymous", title = "Call for Papers: Special issue on sofware and databases in {TCBB}", journal = j-TCBB, volume = "10", number = "2", pages = "544--544", month = mar, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.88", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 16 07:55:23 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rathore:2013:RSC, author = "Saima Rathore and Mutawarra Hussain and Ahmad Ali and Asifullah Khan", title = "A Recent Survey on Colon Cancer Detection Techniques", journal = j-TCBB, volume = "10", number = "3", pages = "545--563", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.84", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Colon cancer causes deaths of about half a million people every year. Common method of its detection is histopathological tissue analysis, which, though leads to vital diagnosis, is significantly correlated to the tiredness, experience, and workload of the pathologist. Researchers have been working since decades to get rid of manual inspection, and to develop trustworthy systems for detecting colon cancer. Several techniques, based on spectral/spatial analysis of colon biopsy images, and serum and gene analysis of colon samples, have been proposed in this regard. Due to rapid evolution of colon cancer detection techniques, a latest review of recent research in this field is highly desirable. The aim of this paper is to discuss various colon cancer detection techniques. In this survey, we categorize the techniques on the basis of the adopted methodology and underlying data set, and provide detailed description of techniques in each category. Additionally, this study provides an extensive comparison of various colon cancer detection categories, and of multiple techniques within each category. Further, most of the techniques have been evaluated on similar data set to provide a fair performance comparison. Analysis reveals that neither of the techniques is perfect; however, research community is progressively inching toward the finest possible solution.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dehzangi:2013:CFE, author = "Abdollah Dehzangi and Kuldip Paliwal and Alok Sharma and Omid Dehzangi and Abdul Sattar", title = "A Combination of Feature Extraction Methods with an Ensemble of Different Classifiers for Protein Structural Class Prediction Problem", journal = j-TCBB, volume = "10", number = "3", pages = "564--575", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.65", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Better understanding of structural class of a given protein reveals important information about its overall folding type and its domain. It can also be directly used to provide critical information on general tertiary structure of a protein which has a profound impact on protein function determination and drug design. Despite tremendous enhancements made by pattern recognition-based approaches to solve this problem, it still remains as an unsolved issue for bioinformatics that demands more attention and exploration. In this study, we propose a novel feature extraction model that incorporates physicochemical and evolutionary-based information simultaneously. We also propose overlapped segmented distribution and autocorrelation-based feature extraction methods to provide more local and global discriminatory information. The proposed feature extraction methods are explored for 15 most promising attributes that are selected from a wide range of physicochemical-based attributes. Finally, by applying an ensemble of different classifiers namely, Adaboost.M1, LogitBoost, naive Bayes, multilayer perceptron (MLP), and support vector machine (SVM) we show enhancement of the protein structural class prediction accuracy for four popular benchmarks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bordewich:2013:AGP, author = "Magnus Bordewich and Radu Mihaescu", title = "Accuracy Guarantees for Phylogeny Reconstruction Algorithms Based on Balanced Minimum Evolution", journal = j-TCBB, volume = "10", number = "3", pages = "576--583", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.39", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Distance-based phylogenetic methods attempt to reconstruct an accurate phylogenetic tree from an estimated matrix of pairwise distances between taxa. This paper examines two distance-based algorithms (GreedyBME and FastME) that are based on the principle of minimizing the balanced minimum evolution score of the output tree in relation to the given estimated distance matrix. This is also the principle that underlies the neighbor-joining (NJ) algorithm. We show that GreedyBME and FastME both reconstruct the entire correct tree if the input data are quartet consistent, and also that if the maximum error of any distance estimate is $ (\epsilon) $, then both algorithms output trees containing all sufficiently long edges of the true tree: those having length at least $ (3 \epsilon) $. That is to say, the algorithms have edge safety radius 1/3. In contrast, quartet consistency of the data is not sufficient to guarantee the NJ algorithm reconstructs the correct tree, and moreover, the NJ algorithm has edge safety radius of 1/4: Only edges of the true tree of length at least $ (4 \epsilon) $ can be guaranteed to appear in the output. These results give further theoretical support to the experimental evidence suggesting FastME is a more suitable distance-based phylogeny reconstruction method than the NJ algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zou:2013:BNM, author = "Yi Ming Zou", title = "{Boolean} Networks with Multiexpressions and Parameters", journal = j-TCBB, volume = "10", number = "3", pages = "584--592", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.79", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To model biological systems using networks, it is desirable to allow more than two levels of expression for the nodes and to allow the introduction of parameters. Various modeling and simulation methods addressing these needs using Boolean models, both synchronous and asynchronous, have been proposed in the literature. However, analytical study of these more general Boolean networks models is lagging. This paper aims to develop a concise theory for these different Boolean logic-based modeling methods. Boolean models for networks where each node can have more than two levels of expression and Boolean models with parameters are defined algebraically with examples provided. Certain classes of random asynchronous Boolean networks and deterministic moduli asynchronous Boolean networks are investigated in detail using the setting introduced in this paper. The derived theorems provide a clear picture for the attractor structures of these asynchronous Boolean networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2013:CFS, author = "Hsi-Che Liu and Pei-Chen Peng and Tzung-Chien Hsieh and Ting-Chi Yeh and Chih-Jen Lin and Chien-Yu Chen and Jen-Yin Hou and Lee-Yung Shih and Der-Cherng Liang", title = "Comparison of Feature Selection Methods for Cross-Laboratory Microarray Analysis", journal = j-TCBB, volume = "10", number = "3", pages = "593--604", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.70", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The amount of gene expression data of microarray has grown exponentially. To apply them for extensive studies, integrated analysis of cross-laboratory (cross-lab) data becomes a trend, and thus, choosing an appropriate feature selection method is an essential issue. This paper focuses on feature selection for Affymetrix (Affy) microarray studies across different labs. We investigate four feature selection methods: $ (t) $-test, significance analysis of microarrays (SAM), rank products (RP), and random forest (RF). The four methods are applied to acute lymphoblastic leukemia, acute myeloid leukemia, breast cancer, and lung cancer Affy data which consist of three cross-lab data sets each. We utilize a rank-based normalization method to reduce the bias from cross-lab data sets. Training on one data set or two combined data sets to test the remaining data set(s) are both considered. Balanced accuracy is used for prediction evaluation. This study provides comprehensive comparisons of the four feature selection methods in cross-lab microarray analysis. Results show that SAM has the best classification performance. RF also gets high classification accuracy, but it is not as stable as SAM. The most naive method is $ (t) $-test, but its performance is the worst among the four methods. In this study, we further discuss the influence from the number of training samples, the number of selected genes, and the issue of unbalanced data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jamil:2013:DIC, author = "Hasan M. Jamil", title = "Designing Integrated Computational Biology Pipelines Visually", journal = j-TCBB, volume = "10", number = "3", pages = "605--618", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.69", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The long-term cost of developing and maintaining a computational pipeline that depends upon data integration and sophisticated workflow logic is too high to even contemplate ``what if'' or ad hoc type queries. In this paper, we introduce a novel application building interface for computational biology research, called VizBuilder, by leveraging a recent query language called BioFlow for life sciences databases. Using VizBuilder, it is now possible to develop ad hoc complex computational biology applications at throw away costs. The underlying query language supports data integration and workflow construction almost transparently and fully automatically, using a best effort approach. Users express their application by drawing it with VizBuilder icons and connecting them in a meaningful way. Completed applications are compiled and translated as BioFlow queries for execution by the data management system LifeDB, for which VizBuilder serves as a front end. We discuss VizBuilder features and functionalities in the context of a real life application after we briefly introduce BioFlow. The architecture and design principles of VizBuilder are also discussed. Finally, we outline future extensions of VizBuilder. To our knowledge, VizBuilder is a unique system that allows visually designing computational biology pipelines involving distributed and heterogeneous resources in an ad hoc manner.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Abate:2013:GLB, author = "Francesco Abate and Andrea Acquaviva and Elisa Ficarra and Roberto Piva and Enrico Macii", title = "{Gelsius}: a Literature-Based Workflow for Determining Quantitative Associations between Genes and Biological Processes", journal = j-TCBB, volume = "10", number = "3", pages = "619--631", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.11", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An effective knowledge extraction and quantification methodology from biomedical literature would allow the researcher to organize and analyze the results of high-throughput experiments on microarrays and next-generation sequencing technologies. Despite the large amount of raw information available on the web, a tool able to extract a measure of the correlation between a list of genes and biological processes is not yet available. In this paper, we present Gelsius, a workflow that incorporates biomedical literature to quantify the correlation between genes and terms describing biological processes. To achieve this target, we build different modules focusing on query expansion and document cononicalization. In this way, we reached to improve the measurement of correlation, performed using a latent semantic analysis approach. To the best of our knowledge, this is the first complete tool able to extract a measure of genes-biological processes correlation from literature. We demonstrate the effectiveness of the proposed workflow on six biological processes and a set of genes, by showing that correlation results for known relationships are in accordance with definitions of gene functions provided by NCI Thesaurus. On the other side, the tool is able to propose new candidate relationships for later experimental validation. The tool is available at {\tt http://bioeda1.polito.it:8080/medSearchServlet/}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Haque:2013:GQB, author = "Md. Muksitul Haque and Lawrence B. Holder and Michael K. Skinner and Diane J. Cook", title = "Generalized Query-Based Active Learning to Identify Differentially Methylated Regions in {DNA}", journal = j-TCBB, volume = "10", number = "3", pages = "632--644", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.38", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Active learning is a supervised learning technique that reduces the number of examples required for building a successful classifier, because it can choose the data it learns from. This technique holds promise for many biological domains in which classified examples are expensive and time-consuming to obtain. Most traditional active learning methods ask very specific queries to the Oracle (e.g., a human expert) to label an unlabeled example. The example may consist of numerous features, many of which are irrelevant. Removing such features will create a shorter query with only relevant features, and it will be easier for the Oracle to answer. We propose a generalized query-based active learning (GQAL) approach that constructs generalized queries based on multiple instances. By constructing appropriately generalized queries, we can achieve higher accuracy compared to traditional active learning methods. We apply our active learning method to find differentially DNA methylated regions (DMRs). DMRs are DNA locations in the genome that are known to be involved in tissue differentiation, epigenetic regulation, and disease. We also apply our method on 13 other data sets and show that our method is better than another popular active learning technique.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gremme:2013:GCS, author = "Gordon Gremme and Sascha Steinbiss and Stefan Kurtz", title = "{GenomeTools}: a Comprehensive Software Library for Efficient Processing of Structured Genome Annotations", journal = j-TCBB, volume = "10", number = "3", pages = "645--656", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.68", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome annotations are often published as plain text files describing genomic features and their subcomponents by an implicit annotation graph. In this paper, we present the GenomeTools, a convenient and efficient software library and associated software tools for developing bioinformatics software intended to create, process or convert annotation graphs. The GenomeTools strictly follow the annotation graph approach, offering a unified graph-based representation. This gives the developer intuitive and immediate access to genomic features and tools for their manipulation. To process large annotation sets with low memory overhead, we have designed and implemented an efficient pull-based approach for sequential processing of annotations. This allows to handle even the largest annotation sets, such as a complete catalogue of human variations. Our object-oriented C-based software library enables a developer to conveniently implement their own functionality on annotation graphs and to integrate it into larger workflows, simultaneously accessing compressed sequence data if required. The careful C implementation of the GenomeTools does not only ensure a light-weight memory footprint while allowing full sequential as well as random access to the annotation graph, but also facilitates the creation of bindings to a variety of script programming languages (like Python and Ruby) sharing the same interface.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2013:HFC, author = "Zhiwen Yu and Hantao Chen and Jane You and Guoqiang Han and Le Li", title = "Hybrid Fuzzy Cluster Ensemble Framework for Tumor Clustering from Biomolecular Data", journal = j-TCBB, volume = "10", number = "3", pages = "657--670", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.59", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cancer class discovery using biomolecular data is one of the most important tasks for cancer diagnosis and treatment. Tumor clustering from gene expression data provides a new way to perform cancer class discovery. Most of the existing research works adopt single-clustering algorithms to perform tumor clustering is from biomolecular data that lack robustness, stability, and accuracy. To further improve the performance of tumor clustering from biomolecular data, we introduce the fuzzy theory into the cluster ensemble framework for tumor clustering from biomolecular data, and propose four kinds of hybrid fuzzy cluster ensemble frameworks (HFCEF), named as HFCEF-I, HFCEF-II, HFCEF-III, and HFCEF-IV, respectively, to identify samples that belong to different types of cancers. The difference between HFCEF-I and HFCEF-II is that they adopt different ensemble generator approaches to generate a set of fuzzy matrices in the ensemble. Specifically, HFCEF-I applies the affinity propagation algorithm (AP) to perform clustering on the sample dimension and generates a set of fuzzy matrices in the ensemble based on the fuzzy membership function and base samples selected by AP. HFCEF-II adopts AP to perform clustering on the attribute dimension, generates a set of subspaces, and obtains a set of fuzzy matrices in the ensemble by performing fuzzy c-means on subspaces. Compared with HFCEF-I and HFCEF-II, HFCEF-III and HFCEF-IV consider the characteristics of HFCEF-I and HFCEF-II. HFCEF-III combines HFCEF-I and HFCEF-II in a serial way, while HFCEF-IV integrates HFCEF-I and HFCEF-II in a concurrent way. HFCEFs adopt suitable consensus functions, such as the fuzzy c-means algorithm or the normalized cut algorithm (Ncut), to summarize generated fuzzy matrices, and obtain the final results. The experiments on real data sets from UCI machine learning repository and cancer gene expression profiles illustrate that (1) the proposed hybrid fuzzy cluster ensemble frameworks work well on real data sets, especially biomolecular data, and (2) the proposed approaches are able to provide more robust, stable, and accurate results when compared with the state-of-the-art single clustering algorithms and traditional cluster ensemble approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{ElBakry:2013:IGR, author = "Ola ElBakry and M. Omair Ahmad and M. N. S. Swamy", title = "Inference of Gene Regulatory Networks with Variable Time Delay from Time-Series Microarray Data", journal = j-TCBB, volume = "10", number = "3", pages = "671--687", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.73", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Regulatory interactions among genes and gene products are dynamic processes and hence modeling these processes is of great interest. Since genes work in a cascade of networks, reconstruction of gene regulatory network (GRN) is a crucial process for a thorough understanding of the underlying biological interactions. We present here an approach based on pairwise correlations and lasso to infer the GRN, taking into account the variable time delays between various genes. The proposed method is applied to both synthetic and real data sets, and the results on synthetic data show that the proposed approach outperforms the current methods. Further, the results using real data are more consistent with the existing knowledge concerning the possible gene interactions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2013:ISS, author = "Xiong Li and Bo Liao and Lijun Cai and Zhi Cao and Wen Zhu", title = "Informative {SNPs} Selection Based on Two-Locus and Multilocus Linkage Disequilibrium: Criteria of Max-Correlation and Min-Redundancy", journal = j-TCBB, volume = "10", number = "3", pages = "688--695", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.61", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Currently, there are lots of methods to select informative SNPs for haplotype reconstruction. However, there are still some challenges that render them ineffective for large data sets. First, some traditional methods belong to wrappers which are of high computational complexity. Second, some methods ignore linkage disequilibrium that it is hard to interpret selection results. In this study, we innovatively derive optimization criteria by combining two-locus and multilocus LD measure to obtain the criteria of Max-Correlation and Min-Redundancy (MCMR). Then, we use a greedy algorithm to select the candidate set of informative SNPs constrained by the criteria. Finally, we use backward scheme to refine the candidate subset. We separately use small and middle ($ > 1, 000 $ SNPs) data sets to evaluate MCMR in terms of the reconstuction accuracy, the time complexity, and the compactness. Additionally, to demonstrate that MCMR is practical for large data sets, we design a parameter $ (w) $ to adapt to various platforms and introduce another replacement scheme for larger data sets, which sharply narrow down the computational complexity of evaluating the reconstruct ratio. Then, we first apply our method based on haplotype reconstruction for large size ($ > 5, 000 $ SNPs) data sets. The results confirm that MCMR leads to promising improvement in informative SNPs selection and prediction accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chan:2013:MAP, author = "Tak-Ming Chan and Leung-Yau Lo and Ho-Yin Sze-To and Kwong-Sak Leung and Xinshu Xiao and Man-Hon Wong", title = "Modeling Associated Protein-{DNA} Pattern Discovery with Unified Scores", journal = j-TCBB, volume = "10", number = "3", pages = "696--707", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.60", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Understanding protein-DNA interactions, specifically transcription factor (TF) and transcription factor binding site (TFBS) bindings, is crucial in deciphering gene regulation. The recent associated TF-TFBS pattern discovery combines one-sided motif discovery on both the TF and the TFBS sides. Using sequences only, it identifies the short protein-DNA binding cores available only in high-resolution 3D structures. The discovered patterns lead to promising subtype and disease analysis applications. While the related studies use either association rule mining or existing TFBS annotations, none has proposed any formal unified (both-sided) model to prioritize the top verifiable associated patterns. We propose the unified scores and develop an effective pipeline for associated TF-TFBS pattern discovery. Our stringent instance-level evaluations show that the patterns with the top unified scores match with the binding cores in 3D structures considerably better than the previous works, where up to 90 percent of the top 20 scored patterns are verified. We also introduce extended verification from literature surveys, where the high unified scores correspond to even higher verification percentage. The top scored patterns are confirmed to match the known WRKY binding cores with no available 3D structures and agree well with the top binding affinities of in vivo experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tang:2013:MIC, author = "Yang Tang and Huijun Gao and Jurgen Kurths", title = "Multiobjective Identification of Controlling Areas in Neuronal Networks", journal = j-TCBB, volume = "10", number = "3", pages = "708--720", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.72", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we investigate the multiobjective identification of controlling areas in the neuronal network of a cat's brain by considering two measures of controllability simultaneously. By utilizing nondominated sorting mechanisms and composite differential evolution (CoDE), a reference-point-based nondominated sorting composite differential evolution (RP-NSCDE) is developed to tackle the multiobjective identification of controlling areas in the neuronal network. The proposed RP-NSCDE shows its promising performance in terms of accuracy and convergence speed, in comparison to nondominated sorting genetic algorithms II. The proposed method is also compared with other representative statistical methods in the complex network theory, single objective, and constraint optimization methods to illustrate its effectiveness and reliability. It is shown that there exists a tradeoff between minimizing two objectives, and therefore Pareto fronts (PFs) can be plotted. The developed approaches and findings can also be applied to coordination control of various kinds of real-world complex networks including biological networks and social networks, and so on.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Humphries:2013:NT, author = "Peter J. Humphries and Taoyang Wu", title = "On the Neighborhoods of Trees", journal = j-TCBB, volume = "10", number = "3", pages = "721--728", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.66", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tree rearrangement operations typically induce a metric on the space of phylogenetic trees. One important property of these metrics is the size of the neighborhood, that is, the number of trees exactly one operation from a given tree. We present an exact expression for the size of the TBR (tree bisection and reconnection) neighborhood, thus answering a question first posed by Allen and Steel. In addition, we also obtain a characterization of the extremal trees whose TBR neighborhoods are maximized and minimized.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2013:PCP, author = "Yijia Zhang and Hongfei Lin and Zhihao Yang and Jian Wang and Yanpeng Li and Bo Xu", title = "Protein Complex Prediction in Large Ontology Attributed Protein-Protein Interaction Networks", journal = j-TCBB, volume = "10", number = "3", pages = "729--741", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.86", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein complexes are important for unraveling the secrets of cellular organization and function. Many computational approaches have been developed to predict protein complexes in protein-protein interaction (PPI) networks. However, most existing approaches focus mainly on the topological structure of PPI networks, and largely ignore the gene ontology (GO) annotation information. In this paper, we constructed ontology attributed PPI networks with PPI data and GO resource. After constructing ontology attributed networks, we proposed a novel approach called CSO (clustering based on network structure and ontology attribute similarity). Structural information and GO attribute information are complementary in ontology attributed networks. CSO can effectively take advantage of the correlation between frequent GO annotation sets and the dense subgraph for protein complex prediction. Our proposed CSO approach was applied to four different yeast PPI data sets and predicted many well-known protein complexes. The experimental results showed that CSO was valuable in predicting protein complexes and achieved state-of-the-art performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sengupta:2013:RKO, author = "Debarka Sengupta and Aroonalok Pyne and Ujjwal Maulik and Sanghamitra Bandyopadhyay", title = "Reformulated {Kemeny} Optimal Aggregation with Application in Consensus Ranking of {microRNA} Targets", journal = j-TCBB, volume = "10", number = "3", pages = "742--751", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.74", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs are very recently discovered small noncoding RNAs, responsible for negative regulation of gene expression. Members of this endogenous family of small RNA molecules have been found implicated in many genetic disorders. Each microRNA targets tens to hundreds of genes. Experimental validation of target genes is a time- and cost-intensive procedure. Therefore, prediction of microRNA targets is a very important problem in computational biology. Though, dozens of target prediction algorithms have been reported in the past decade, they disagree significantly in terms of target gene ranking (based on predicted scores). Rank aggregation is often used to combine multiple target orderings suggested by different algorithms. This technique has been used in diverse fields including social choice theory, meta search in web, and most recently, in bioinformatics. Kemeny optimal aggregation (KOA) is considered the more profound objective for rank aggregation. The consensus ordering obtained through Kemeny optimal aggregation incurs minimum pairwise disagreement with the input orderings. Because of its computational intractability, heuristics are often formulated to obtain a near optimal consensus ranking. Unlike its real time use in meta search, there are a number of scenarios in bioinformatics (e.g., combining microRNA target rankings, combining disease-related gene rankings obtained from microarray experiments) where evolutionary approaches can be afforded with the ambition of better optimization. We conjecture that an ideal consensus ordering should have its total disagreement shared, as equally as possible, with the input orderings. This is also important to refrain the evolutionary processes from getting stuck to local extremes. In the current work, we reformulate Kemeny optimal aggregation while introducing a trade-off between the total pairwise disagreement and its distribution. A simulated annealing-based implementation of the proposed objective has been found effective in context of microRNA target ranking. Supplementary data and source code link are available at: {\tt http://www.isical.ac.in/bioinfo_miu/ieee_tcbb_kemeny.rar}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2013:SBP, author = "Jian-Sheng Wu and Zhi-Hua Zhou", title = "Sequence-Based Prediction of {microRNA}-Binding Residues in Proteins Using Cost-Sensitive {Laplacian} Support Vector Machines", journal = j-TCBB, volume = "10", number = "3", pages = "752--759", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.75", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The recognition of microRNA (miRNA)-binding residues in proteins is helpful to understand how miRNAs silence their target genes. It is difficult to use existing computational method to predict miRNA-binding residues in proteins due to the lack of training examples. To address this issue, unlabeled data may be exploited to help construct a computational model. Semisupervised learning deals with methods for exploiting unlabeled data in addition to labeled data automatically to improve learning performance, where no human intervention is assumed. In addition, miRNA-binding proteins almost always contain a much smaller number of binding than nonbinding residues, and cost-sensitive learning has been deemed as a good solution to the class imbalance problem. In this work, a novel model is proposed for recognizing miRNA-binding residues in proteins from sequences using a cost-sensitive extension of Laplacian support vector machines (CS-LapSVM) with a hybrid feature. The hybrid feature consists of evolutionary information of the amino acid sequence (position-specific scoring matrices), the conservation information about three biochemical properties (HKM) and mutual interaction propensities in protein-miRNA complex structures. The CS-LapSVM receives good performance with an F1 score of $ (26.23 \pm 2.55 \%) $ and an AUC value of $ (0.805 \pm 0.020) $ superior to existing approaches for the recognition of RNA-binding residues. A web server called SARS is built and freely available for academic usage.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Haack:2013:SRL, author = "Fiete Haack and Kevin Burrage and Ronald Redmer and Adelinde M. Uhrmacher", title = "Studying the Role of Lipid Rafts on Protein Receptor Bindings with Cellular Automata", journal = j-TCBB, volume = "10", number = "3", pages = "760--770", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.40", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It is widely accepted that lipid rafts promote receptor clustering and thereby facilitate signaling transduction. The role of lipid rafts in inducing and promoting receptor accumulation within the cell membrane has been explored by several computational and experimental studies. However, it remains unclear whether lipid rafts influence the recruitment and binding of proteins from the cytosol as well. To provide an answer to this question a spatial membrane model has been developed based on cellular automata. Our results indicate that lipid rafts indeed influence protein receptor bindings. In particular processes with slow dissociation and binding kinetics are promoted by lipid rafts, whereas fast binding processes are slightly hampered. However, the impact depends on a variety of parameters, such as the size and mobility of the lipid rafts, the induced slow down of receptors within rafts, and also the dissociation and binding kinetics of the cytosolic proteins. Thus, for any individual signaling pathway the influence of lipid rafts on protein binding might be different. To facilitate analyzing this influence given a specific pathway, our approach has been generalized into LiRaM, a modeling and simulation tool for lipid rafts models.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2013:SAM, author = "Wei Zhang and Xiufen Zou", title = "Systematic Analysis of the Mechanisms of Virus-Triggered {Type I IFN} Signaling Pathways through Mathematical Modeling", journal = j-TCBB, volume = "10", number = "3", pages = "771--779", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.31", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Based on biological experimental data, we developed a mathematical model of the virus-triggered signaling pathways that lead to induction of type I IFNs and systematically analyzed the mechanisms of the cellular antiviral innate immune responses, including the negative feedback regulation of ISG56 and the positive feedback regulation of IFNs. We found that the time between 5 and 48 hours after viral infection is vital for the control and/or elimination of the virus from the host cells and demonstrated that the ISG56-induced inhibition of MITA activation is stronger than the ISG56-induced inhibition of TBK1 activation. The global parameter sensitivity analysis suggests that the positive feedback regulation of IFNs is very important in the innate antiviral system. Furthermore, the robustness of the innate immune signaling network was demonstrated using a new robustness index. These results can help us understand the mechanisms of the virus-induced innate immune response at a system level and provide instruction for further biological experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2013:UBT, author = "Allen L. Hu and Keith C. C. Chan", title = "Utilizing Both Topological and Attribute Information for Protein Complex Identification in {PPI} Networks", journal = j-TCBB, volume = "10", number = "3", pages = "780--792", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.37", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many computational approaches developed to identify protein complexes in protein-protein interaction (PPI) networks perform their tasks based only on network topologies. The attributes of the proteins in the networks are usually ignored. As protein attributes within a complex may also be related to each other, we have developed a PCIA algorithm to take into consideration both such information and network topology in the identification process of protein complexes. Given a PPI network, PCIA first finds information about the attributes of the proteins in a PPI network in the Gene Ontology databases and uses such information for the identification of protein complexes. PCIA then computes a Degree of Association measure for each pair of interacting proteins to quantitatively determine how much their attribute values associate with each other. Based on this association measure, PCIA is able to discover dense graph clusters consisting of proteins whose attribute values are significantly closer associated with each other. PCIA has been tested with real data and experimental results seem to indicate that attributes of the proteins in the same complex do have some association with each other and, therefore, that protein complexes can be more accurately identified when protein attributes are taken into consideration.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Garcia:2013:GDA, author = "Sara P. Garcia and Joao M. O. S. Rodrigues and Sergio Santos and Diogo Pratas and Vera Afreixo and Carlos Bastos and Paulo J. S. G. Ferreira and Armando J. Pinho", title = "A Genomic Distance for Assembly Comparison Based on Compressed Maximal Exact Matches", journal = j-TCBB, volume = "10", number = "3", pages = "793--798", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.77", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome assemblies are typically compared with respect to their contiguity, coverage, and accuracy. We propose a genome-wide, alignment-free genomic distance based on compressed maximal exact matches and suggest adding it to the benchmark of commonly used assembly quality metrics. Maximal exact matches are perfect repeats, without gaps or misspellings, which cannot be further extended to either their left- or right-end side without loss of similarity. The genomic distance here proposed is based on the normalized compression distance, an information-theoretic measure of the relative compressibility of two sequences estimated using multiple finite-context models. This measure exposes similarities between the sequences, as well as, the nesting structure underlying the assembly of larger maximal exact matches from smaller ones. We use four human genome assemblies for illustration and discuss the impact of genome sequencing and assembly in the final content of maximal exact matches and the genomic distance here proposed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Goni:2013:NAI, author = "Osman Goni", title = "A New Approach to Implement Absorbing Boundary Condition in Biomolecular Electrostatics", journal = j-TCBB, volume = "10", number = "3", pages = "799--804", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.96", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper discusses a novel approach to employ the absorbing boundary condition in conjunction with the finite-element method (FEM) in biomolecular electrostatics. The introduction of Bayliss-Turkel absorbing boundary operators in electromagnetic scattering problem has been incorporated by few researchers. However, in the area of biomolecular electrostatics, this boundary condition has not been investigated yet. The objective of this paper is twofold. First, to solve nonlinear Poisson--Boltzmann equation using Newton's method and second, to find an efficient and acceptable solution with minimum number of unknowns. In this work, a Galerkin finite-element formulation is used along with a Bayliss-Turkel absorbing boundary operator that explicitly accounts for the open field problem by mapping the Sommerfeld radiation condition from the far field to near field. While the Bayliss-Turkel condition works well when the artificial boundary is far from the scatterer, an acceptable tolerance of error can be achieved with the second order operator. Numerical results on test case with simple sphere show that the treatment is able to reach the same level of accuracy achieved by the analytical method while using a lower grid density. Bayliss-Turkel absorbing boundary condition (BTABC) combined with the FEM converges to the exact solution of scattering problems to within discretization error.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Milotti:2013:CAB, author = "Edoardo Milotti and Vladislav Vyshemirsky and Michela Sega and Sabrina Stella and Federico Dogo and Roberto Chignola", title = "Computer-Aided Biophysical Modeling: a Quantitative Approach to Complex Biological Systems", journal = j-TCBB, volume = "10", number = "3", pages = "805--810", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.35", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "When dealing with the biophysics of tumors, analytical and numerical modeling tools have long been regarded as potentially useful but practically immature tools. Further developments could not just overturn this predicament, but lead to completely new perspectives in biology. Here, we give an account of our own computational tool and how we have put it to good use, and we discuss a paradigmatic example to outline a path to making cell biology more quantitative and predictive.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ivanov:2013:QPP, author = "Stefan Ivanov and Ivan Dimitrov and Irina Doytchinova", title = "Quantitative Prediction of Peptide Binding to {HLA-DP1} Protein", journal = j-TCBB, volume = "10", number = "3", pages = "811--815", month = may, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.78", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The exogenous proteins are processed by the host antigen-processing cells. Peptidic fragments of them are presented on the cell surface bound to the major hystocompatibility complex (MHC) molecules class II and recognized by the CD4+ T lymphocytes. The MHC binding is considered as the crucial prerequisite for T-cell recognition. Only peptides able to form stable complexes with the MHC proteins are recognized by the T-cells. These peptides are known as T-cell epitopes. All T-cell epitopes are MHC binders, but not all MHC binders are T-cell epitopes. The T-cell epitope prediction is one of the main priorities of immunoinformatics. In the present study, three chemometric techniques are combined to derive a model for in silico prediction of peptide binding to the human MHC class II protein HLA-DP1. The structures of a set of known peptide binders are described by amino acid z-descriptors. Data are processed by an iterative self-consisted algorithm using the method of partial least squares, and a quantitative matrix (QM) for peptide binding prediction to HLA-DP1 is derived. The QM is validated by two sets of proteins and showed an average accuracy of 86 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2013:CPSb, author = "Anonymous", title = "Call for Papers: Special Issue on Software and Databases in {TCBB}", journal = j-TCBB, volume = "10", number = "3", pages = "816--816", month = may, year = "2013", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Nov 27 16:23:40 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2013:PFP, author = "Guoxian Yu and Huzefa Rangwala and Carlotta Domeniconi and Guoji Zhang and Zhiwen Yu", title = "Protein Function Prediction using Multi-label Ensemble Classification", journal = j-TCBB, volume = "10", number = "4", pages = "1--1", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.111", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput experimental techniques produce several heterogeneous proteomic and genomic datasets. To computationally annotate proteins, it is necessary and promising to integrate these heterogeneous data sources. Some methods transform these data sources into different kernels or feature representations. Next, these kernels are linearly (or non-linearly) combined into a composite kernel. The composite kernel is utilized to develop a predictive model to infer the function of proteins. A protein can have multiple roles and functions (or labels). Therefore, multi-label learning methods are also adapted for protein function prediction. We develop a transductive multi-label classifier (TMC) to predict multiple functions of proteins using several unlabeled proteins. We also propose a method called transductive multi-label ensemble classifier (TMEC) for integrating the different data sources using an ensemble approach. TMEC trains a graph-based multi-label classifier on each single data source and then combines the predictions of the individual classifiers. We use a directed bi-relational graph to captures three types of relationships between pairs of proteins, between pairs of functions, and between proteins and functions. We evaluate the effectiveness of TMC and TMEC to predict the functions of proteins on three benchmarks. We show that our approaches perform better than recently proposed protein function prediction methods on composite and multiple kernels.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{deSouto:2013:GES, author = "Marcilio C. P. de Souto and Maricel Kann", title = "Guest Editorial for Special Section on {BSB 2012}", journal = j-TCBB, volume = "10", number = "4", pages = "817--818", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.173", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feijao:2013:EAF, author = "Pedro Feijao and Joao Meidanis", title = "Extending the Algebraic Formalism for Genome Rearrangements to Include Linear Chromosomes", journal = j-TCBB, volume = "10", number = "4", pages = "819--831", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.161", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Algebraic rearrangement theory, as introduced by Meidanis and Dias, focuses on representing the order in which genes appear in chromosomes, and applies to circular chromosomes only. By shifting our attention to genome adjacencies, we introduce the adjacency algebraic theory, extending the original algebraic theory to linear chromosomes in a very natural way, also allowing the original algebraic distance formula to be used to the general multichromosomal case, with both linear and circular chromosomes. The resulting distance, which we call algebraic distance here, is very similar to, but not quite the same as, double-cut-and-join distance. We present linear time algorithms to compute it and to sort genomes. We show how to compute the rearrangement distance from the adjacency graph, for an easier comparison with other rearrangement distances. A thorough discussion on the relationship between the chromosomal and adjacency representation is also given, and we show how all classic rearrangement operations can be modeled using the algebraic theory.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lorenz:2013:MQR, author = "Ronny Lorenz and Stephan H. Bernhart and Jing Qin and Christian Honer zu Siederdissen and Andrea Tanzer and Fabian Amman and Ivo L. Hofacker and Peter F. Stadler", title = "{$2$D} Meets {$4$G}: {$G$}-Quadruplexes in {RNA} Secondary Structure Prediction", journal = j-TCBB, volume = "10", number = "4", pages = "832--844", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.7", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "G-quadruplexes are abundant locally stable structural elements in nucleic acids. The combinatorial theory of RNA structures and the dynamic programming algorithms for RNA secondary structure prediction are extended here to incorporate G-quadruplexes using a simple but plausible energy model. With preliminary energy parameters, we find that the overwhelming majority of putative quadruplex-forming sequences in the human genome are likely to fold into canonical secondary structures instead. Stable G-quadruplexes are strongly enriched, however, in the 5{\^E}$^{}^1$UTR of protein coding mRNAs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jaskowiak:2013:PMC, author = "Pablo A. Jaskowiak and Ricardo J. G. B. Campello and Ivan G. Costa Filho", title = "Proximity Measures for Clustering Gene Expression Microarray Data: a Validation Methodology and a Comparative Analysis", journal = j-TCBB, volume = "10", number = "4", pages = "845--857", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.9", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cluster analysis is usually the first step adopted to unveil information from gene expression microarray data. Besides selecting a clustering algorithm, choosing an appropriate proximity measure (similarity or distance) is of great importance to achieve satisfactory clustering results. Nevertheless, up to date, there are no comprehensive guidelines concerning how to choose proximity measures for clustering microarray data. Pearson is the most used proximity measure, whereas characteristics of other ones remain unexplored. In this paper, we investigate the choice of proximity measures for the clustering of microarray data by evaluating the performance of 16 proximity measures in 52 data sets from time course and cancer experiments. Our results support that measures rarely employed in the gene expression literature can provide better results than commonly employed ones, such as Pearson, Spearman, and Euclidean distance. Given that different measures stood out for time course and cancer data evaluations, their choice should be specific to each scenario. To evaluate measures on time-course data, we preprocessed and compiled 17 data sets from the microarray literature in a benchmark along with a new methodology, called Intrinsic Biological Separation Ability (IBSA). Both can be employed in future research to assess the effectiveness of new measures for gene time-course data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Panja:2013:CLC, author = "Surajit Panja and Sourav Patra and Anirban Mukherjee and Madhumita Basu and Sanghamitra Sengupta and Pranab K. Dutta", title = "A Closed-Loop Control Scheme for Steering Steady States of Glycolysis and Glycogenolysis Pathway", journal = j-TCBB, volume = "10", number = "4", pages = "858--868", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.82", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biochemical networks normally operate in the neighborhood of one of its multiple steady states. It may reach from one steady state to other within a finite time span. In this paper, a closed-loop control scheme is proposed to steer states of the glycolysis and glycogenolysis (GG) pathway from one of its steady states to other. The GG pathway is modeled in the synergism and saturation system formalism, known as S-system. This S-system model is linearized into the controllable Brunovsky canonical form using a feedback linearization technique. For closed-loop control, the linear-quadratic regulator (LQR) and the linear-quadratic Gaussian (LQG) regulator are invoked to design a controller for tracking prespecified steady states. In the feedback linearization technique, a global diffeomorphism function is proposed that facilitates in achieving the regulation requirement. The robustness of the regulated GG pathway is studied considering input perturbation and with measurement noise.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ozsoy:2013:DCA, author = "Oyku Eren Ozsoy and Tolga Can", title = "A Divide and Conquer Approach for Construction of Large-Scale Signaling Networks from {PPI} and {RNAi} Data Using Linear Programming", journal = j-TCBB, volume = "10", number = "4", pages = "869--883", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.80", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Inference of topology of signaling networks from perturbation experiments is a challenging problem. Recently, the inference problem has been formulated as a reference network editing problem and it has been shown that finding the minimum number of edit operations on a reference network to comply with perturbation experiments is an NP-complete problem. In this paper, we propose an integer linear optimization (ILP) model for reconstruction of signaling networks from RNAi data and a reference network. The ILP model guarantees the optimal solution; however, is practical only for small signaling networks of size 10-15 genes due to computational complexity. To scale for large signaling networks, we propose a divide and conquer-based heuristic, in which a given reference network is divided into smaller subnetworks that are solved separately and the solutions are merged together to form the solution for the large network. We validate our proposed approach on real and synthetic data sets, and comparison with the state of the art shows that our proposed approach is able to scale better for large networks while attaining similar or better biological accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2013:KBM, author = "Ken D. Nguyen and Yi Pan", title = "A Knowledge-Based Multiple-Sequence Alignment Algorithm", journal = j-TCBB, volume = "10", number = "4", pages = "884--896", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A common and cost-effective mechanism to identify the functionalities, structures, or relationships between species is multiple-sequence alignment, in which DNA/RNA/protein sequences are arranged and aligned so that similarities between sequences are clustered together. Correctly identifying and aligning these sequence biological similarities help from unwinding the mystery of species evolution to drug design. We present our knowledge-based multiple sequence alignment (KB-MSA) technique that utilizes the existing knowledge databases such as SWISSPROT, GENBANK, or HOMSTRAD to provide a more realistic and reliable sequence alignment. We also provide a modified version of this algorithm (CB-MSA) that utilizes the sequence consistency information when sequence knowledge databases are not available. Our benchmark tests on BAliBASE, PREFAB, HOMSTRAD, and SABMARK references show accuracy improvements up to 10 percent on twilight data sets against many leading alignment tools such as ISPALIGN, PADT, CLUSTALW, MAFFT, PROBCONS, and T-COFFEE.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2013:IAA, author = "Nan Liu and Haitao Jiang and Daming Zhu and Binhai Zhu", title = "An Improved Approximation Algorithm for Scaffold Filling to Maximize the Common Adjacencies", journal = j-TCBB, volume = "10", number = "4", pages = "905--913", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.100", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Scaffold filling is a new combinatorial optimization problem in genome sequencing. The one-sided scaffold filling problem can be described as given an incomplete genome $ (I) $ and a complete (reference) genome $ (G) $, fill the missing genes into $ (I) $ such that the number of common (string) adjacencies between the resulting genome $ (I^{\prime }) $ and $ (G) $ is maximized. This problem is NP-complete for genome with duplicated genes and the best known approximation factor is 1.33, which uses a greedy strategy. In this paper, we prove a better lower bound of the optimal solution, and devise a new algorithm by exploiting the maximum matching method and a local improvement technique, which improves the approximation factor to 1.25. For genome with gene repetitions, this is the only known NP-complete problem which admits an approximation with a small constant factor (less than 1.5).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Das:2013:ORS, author = "Mouli Das and C. A. Murthy and Rajat K. De", title = "An Optimization Rule for In Silico Identification of Targeted Overproduction in Metabolic Pathways", journal = j-TCBB, volume = "10", number = "4", pages = "914--926", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.67", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In an extension of previous work, here we introduce a second-order optimization method for determining optimal paths from the substrate to a target product of a metabolic network, through which the amount of the target is maximum. An objective function for the said purpose, along with certain linear constraints, is considered and minimized. The basis vectors spanning the null space of the stoichiometric matrix, depicting the metabolic network, are computed, and their convex combinations satisfying the constraints are considered as flux vectors. A set of other constraints, incorporating weighting coefficients corresponding to the enzymes in the pathway, are considered. These weighting coefficients appear in the objective function to be minimized. During minimization, the values of these weighting coefficients are estimated and learned. These values, on minimization, represent an optimal pathway, depicting optimal enzyme concentrations, leading to overproduction of the target. The results on various networks demonstrate the usefulness of the methodology in the domain of metabolic engineering. A comparison with the standard gradient descent and the extreme pathway analysis technique is also performed. Unlike the gradient descent method, the present method, being independent of the learning parameter, exhibits improved results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2013:ARA, author = "Chao Luo and Xingyuan Wang", title = "Algebraic Representation of Asynchronous Multiple-Valued Networks and Its Dynamics", journal = j-TCBB, volume = "10", number = "4", pages = "927--938", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.112", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, dynamics of asynchronous multiple-valued networks (AMVNs) are investigated based on linear representation. By semitensor product of matrices, we convert AMVNs into the discrete-time linear representation. A general formula to calculate all of network transition matrices of a specific AMVN is achieved. A necessary and sufficient algebraic criterion to determine whether a given state belongs to loose attractors of length $ (s) $ is proposed. Formulas for the numbers of attractors in AMVNs are provided. Finally, algorithms are presented to detect all of the attractors and basins. Examples are shown to demonstrate the feasibility of the proposed scheme.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Halasz:2013:ASS, author = "Adam M. Halasz and Hong-Jian Lai and Meghan M. McCabe and Krishnan Radhakrishnan and Jeremy S. Edwards", title = "Analytical Solution of Steady-State Equations for Chemical Reaction Networks with Bilinear Rate Laws", journal = j-TCBB, volume = "10", number = "4", pages = "957--969", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.41", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "True steady states are a rare occurrence in living organisms, yet their knowledge is essential for quasi-steady-state approximations, multistability analysis, and other important tools in the investigation of chemical reaction networks (CRN) used to describe molecular processes on the cellular level. Here, we present an approach that can provide closed form steady-state solutions to complex systems, resulting from CRN with binary reactions and mass-action rate laws. We map the nonlinear algebraic problem of finding steady states onto a linear problem in a higher-dimensional space. We show that the linearized version of the steady-state equations obeys the linear conservation laws of the original CRN. We identify two classes of problems for which complete, minimally parameterized solutions may be obtained using only the machinery of linear systems and a judicious choice of the variables used as free parameters. We exemplify our method, providing explicit formulae, on CRN describing signal initiation of two important types of RTK receptor-ligand systems, VEGF and EGF-ErbB1.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Todor:2013:CTP, author = "Andrei Todor and Alin Dobra and Tamer Kahveci", title = "Characterizing the Topology of Probabilistic Biological Networks", journal = j-TCBB, volume = "10", number = "4", pages = "970--983", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.108", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological interactions are often uncertain events, that may or may not take place with some probability. This uncertainty leads to a massive number of alternative interaction topologies for each such network. The existing studies analyze the degree distribution of biological networks by assuming that all the given interactions take place under all circumstances. This strong and often incorrect assumption can lead to misleading results. In this paper, we address this problem and develop a sound mathematical basis to characterize networks in the presence of uncertain interactions. Using our mathematical representation, we develop a method that can accurately describe the degree distribution of such networks. We also take one more step and extend our method to accurately compute the joint-degree distributions of node pairs connected by edges. The number of possible network topologies grows exponentially with the number of uncertain interactions. However, the mathematical model we develop allows us to compute these degree distributions in polynomial time in the number of interactions. Our method works quickly even for entire protein-protein interaction (PPI) networks. It also helps us find an adequate mathematical model using MLE. We perform a comparative study of node-degree and joint-degree distributions in two types of biological networks: the classical deterministic networks and the more flexible probabilistic networks. Our results confirm that power-law and log-normal models best describe degree distributions for both probabilistic and deterministic networks. Moreover, the inverse correlation of degrees of neighboring nodes shows that, in probabilistic networks, nodes with large number of interactions prefer to interact with those with small number of interactions more frequently than expected. We also show that probabilistic networks are more robust for node-degree distribution computation than the deterministic ones. Availability: all the data sets used, the software implemented and the alignments found in this paper are available at http://bioinformatics.cise.ufl.edu/projects/probNet/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Torres-Sanchez:2013:GFG, author = "Sergio Torres-Sanchez and Nuria Medina-Medina and Chris Gignoux and Maria del Mar Abad-Grau and Esteban Gonzalez-Burchard", title = "{GeneOnEarth}: Fitting Genetic {PC} Plots on the Globe", journal = j-TCBB, volume = "10", number = "4", pages = "1009--1016", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.81", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Principal component (PC) plots have become widely used to summarize genetic variation of individuals in a sample. The similarity between genetic distance in PC plots and geographical distance has shown to be quite impressive. However, in most situations, individual ancestral origins are not precisely known or they are heterogeneously distributed; hence, they are hardly linked to a geographical area. We have developed GeneOnEarth, a user-friendly web-based tool to help geneticists to understand whether a linear isolation-by-distance model may apply to a genetic data set; thus, genetic distances among a set of individuals resemble geographical distances among their origins. Its main goal is to allow users to first apply a by-view Procrustes method to visually learn whether this model holds. To do that, the user can choose the exact geographical area from an on line 2D or 3D world map by using, respectively, Google Maps or Google Earth, and rotate, flip, and resize the images. GeneOnEarth can also compute the optimal rotation angle using Procrustes analysis and assess statistical evidence of similarity when a different rotation angle has been chosen by the user. An online version of GeneOnEarth is available for testing and using purposes at http://bios.ugr.es/GeneOnEarth.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2013:IDB, author = "Yuan Zhu and Weiqiang Zhou and Dao-Qing Dai and Hong Yan", title = "Identification of {DNA}-Binding and Protein-Binding Proteins Using Enhanced Graph Wavelet Features", journal = j-TCBB, volume = "10", number = "4", pages = "1017--1031", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.117", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Interactions between biomolecules play an essential role in various biological processes. For predicting DNA-binding or protein-binding proteins, many machine-learning-based techniques have used various types of features to represent the interface of the complexes, but they only deal with the properties of a single atom in the interface and do not take into account the information of neighborhood atoms directly. This paper proposes a new feature representation method for biomolecular interfaces based on the theory of graph wavelet. The enhanced graph wavelet features (EGWF) provides an effective way to characterize interface feature through adding physicochemical features and exploiting a graph wavelet formulation. Particularly, graph wavelet condenses the information around the center atom, and thus enhances the discrimination of features of biomolecule binding proteins in the feature space. Experiment results show that EGWF performs effectively for predicting DNA-binding and protein-binding proteins in terms of Matthew's correlation coefficient (MCC) score and the area value under the receiver operating characteristic curve (AUC).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Angione:2013:POO, author = "Claudio Angione and Giovanni Carapezza and Jole Costanza and Pietro Lio and Giuseppe Nicosia", title = "{Pareto} Optimality in Organelle Energy Metabolism Analysis", journal = j-TCBB, volume = "10", number = "4", pages = "1032--1044", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.95", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In low and high eukaryotes, energy is collected or transformed in compartments, the organelles. The rich variety of size, characteristics, and density of the organelles makes it difficult to build a general picture. In this paper, we make use of the Pareto-front analysis to investigate the optimization of energy metabolism in mitochondria and chloroplasts. Using the Pareto optimality principle, we compare models of organelle metabolism on the basis of single- and multiobjective optimization, approximation techniques (the Bayesian Automatic Relevance Determination), robustness, and pathway sensitivity analysis. Finally, we report the first analysis of the metabolic model for the hydrogenosome of \bioname{Trichomonas vaginalis}, which is found in several protozoan parasites. Our analysis has shown the importance of the Pareto optimality for such comparison and for insights into the evolution of the metabolism from cytoplasmic to organelle bound, involving a model order reduction. We report that Pareto fronts represent an asymptotic analysis useful to describe the metabolism of an organism aimed at maximizing concurrently two or more metabolite concentrations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Requeno:2013:TLP, author = "Jose Ignacio Requeno and Gregorio de Miguel Casado and Roberto Blanco and Jose Manuel Colom", title = "Temporal Logics for Phylogenetic Analysis via Model Checking", journal = j-TCBB, volume = "10", number = "4", pages = "1058--1070", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.87", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The need for general-purpose algorithms for studying biological properties in phylogenetics motivates research into formal verification frameworks. Researchers can focus their efforts exclusively on evolution trees and property specifications. To this end, model checking, a mature automated verification technique originating in computer science, is applied to phylogenetic analysis. Our approach is based on three cornerstones: a logical modeling of the evolution with transition systems; the specification of both phylogenetic properties and trees using flexible temporal logic formulas; and the verification of the latter by means of automated computer tools. The most conspicuous result is the inception of a formal framework which allows for a symbolic manipulation of biological data (based on the codification of the taxa). Additionally, different logical models of evolution can be considered, complex properties can be specified in terms of the logical composition of others, and the refinement of unfulfilled properties as well as the discovery of new properties can be undertaken by exploiting the verification results. Some experimental results using a symbolic model verifier support the feasibility of the approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rajabli:2013:ADS, author = "Farid Rajabli and Unal Goktas and Gul Inan", title = "Application of {Dempster--Schafer} Method in Family-Based Association Studies", journal = j-TCBB, volume = "10", number = "4", pages = "1071--1075", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.85", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In experiments designed for family-based association studies, methods such as transmission disequilibrium test require large number of trios to identify single-nucleotide polymorphisms associated with the disease. However, unavailability of a large number of trios is the Achilles' heel of many complex diseases, especially for late-onset diseases. In this paper, we propose a novel approach to this problem by means of the Dempster-Shafer method. The simulation studies show that the Dempster-Shafer method has a promising overall performance, in identifying single-nucleotide polymorphisms in the correct association class, as it has 90 percent accuracy even with 60 trios.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gordon:2013:HWP, author = "Kevaughn Gordon and Eric Ford and Katherine {St. John}", title = "{Hamiltonian} Walks of Phylogenetic Treespaces", journal = j-TCBB, volume = "10", number = "4", pages = "1076--1079", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.105", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We answer Bryant's combinatorial challenge on minimal walks of phylogenetic treespace under the nearest-neighbor interchange (NNI) metric. We show that the shortest path through the NNI-treespace of $ (n) $-leaf trees is Hamiltonian for all $ (n) $. That is, there is a minimal path that visits all binary trees exactly once, under NNI moves.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2013:HCH, author = "Tianwei Yu and Hesen Peng", title = "Hierarchical Clustering of High-Throughput Expression Data Based on General Dependences", journal = j-TCBB, volume = "10", number = "4", pages = "1080--1085", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.99", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput expression technologies, including gene expression array and liquid chromatography--mass spectrometry (LC-MS) and so on, measure thousands of features, i.e., genes or metabolites, on a continuous scale. In such data, both linear and nonlinear relations exist between features. Nonlinear relations can reflect critical regulation patterns in the biological system. However, they are not identified and utilized by traditional clustering methods based on linear associations. Clustering based on general dependences, i.e., both linear and nonlinear relations, is hampered by the high dimensionality and high noise level of the data. We developed a sensitive nonparametric measure of general dependence between (groups of) random variables in high dimensions. Based on this dependence measure, we developed a hierarchical clustering method. In simulation studies, the method outperformed correlation- and mutual information (MI)-based hierarchical clustering methods in clustering features with nonlinear dependences. We applied the method to a microarray data set measuring the gene expression in cell-cycle time series to show it generates biologically relevant results. The R code is available at http://userwww.service.emory.edu/~tyu8/GDHC.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2013:CPSc, author = "Anonymous", title = "Call for Papers: Special Issue on Software and Databases in {TCBB}", journal = j-TCBB, volume = "10", number = "4", pages = "1086--1086", month = jul, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.154", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:33:59 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2013:NBC, author = "Kuan-Liang Liu and Tzu-Tsung Wong", title = "Na{\"\i}ve {Bayesian} Classifiers with Multinomial Models for {rRNA} Taxonomic Assignment", journal = j-TCBB, volume = "10", number = "5", pages = "1--1", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The introduction of next generation sequencing in ecological studies has created a major revolution in microbial and fungal ecology. Direct sequencing of hypervariable regions from ribosomal RNA genes can provide rapid and inexpensive analysis for ecological communities. In order to get deep understanding from these rRNA fragments, the Ribosomal Database Project developed the 'RDP Classifier' utilizing 8-mer nucleotide frequencies with Bayesian theorem to obtain taxonomy affiliation. The classifier is computationally efficient and works well with massive short sequences. However, the binary model employed in the RDP classifier does not consider the repetitive 8-mers in each reference sequence. Previous studies have pointed out that multinomial model usually results a better performance than binary model. In this study, we present the na{\"\i}ve Bayesian classifiers with multinomial models that take repetitive 8-mers into account for classifying microbial 16S and fungal 28S rRNA sequences. The results obtained from the multinomial approach were compared with those obtained from the binomial RDP classifier by 250-bp, 400-bp, 800-bp, and full-length reads to demonstrate that the multinomial approach can generally achieve a higher prediction accuracy in most hypervariable regions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kahveci:2013:GEA, author = "Tamer Kahveci and Mona Singh", title = "Guest Editorial for {ACM BCB}", journal = j-TCBB, volume = "10", number = "5", pages = "1089--1090", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.182", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kuroshu:2013:NCP, author = "Reginaldo M. Kuroshu", title = "Nonoverlapping Clone Pooling for High-Throughput Sequencing", journal = j-TCBB, volume = "10", number = "5", pages = "1091--1097", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.83", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Simultaneously sequencing multiple clones using second-generation sequencers can speed up many essential clone-based sequencing methods. However, in applications such as fosmid clone sequencing and full-length cDNA sequencing, it is important to create pools of clones that do not overlap on the genome for the identification of structural variations and alternatively spliced transcripts, respectively. We define the nonoverlapping clone pooling problem and provide practical solutions based on optimal graph coloring and bin-packing algorithms with constant absolute worst-case ratios, and further extend them to cope with repetitive mappings. Using theoretical analysis and experiments, we also show that the proposed methods are applicable.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hossain:2013:IMS, author = "K. S. M. Tozammel Hossain and Debprakash Patnaik and Srivatsan Laxman and Prateek Jain and Chris Bailey-Kellogg and Naren Ramakrishnan", title = "Improved Multiple Sequence Alignments Using Coupled Pattern Mining", journal = j-TCBB, volume = "10", number = "5", pages = "1098--1112", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.36", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present alignment refinement by mining coupled residues (ARMiCoRe), a novel approach to a classical bioinformatics problem, viz., multiple sequence alignment (MSA) of gene and protein sequences. Aligning multiple biological sequences is a key step in elucidating evolutionary relationships, annotating newly sequenced segments, and understanding the relationship between biological sequences and functions. Classical MSA algorithms are designed to primarily capture conservations in sequences whereas couplings, or correlated mutations, are well known as an additional important aspect of sequence evolution. (Two sequence positions are coupled when mutations in one are accompanied by compensatory mutations in another). As a result, better exposition of couplings is sometimes one of the reasons for hand-tweaking of MSAs by practitioners. ARMiCoRe introduces a distinctly pattern mining approach to improving MSAs: using frequent episode mining as a foundational basis, we define the notion of a coupled pattern and demonstrate how the discovery and tiling of coupled patterns using a max-flow approach can yield MSAs that are better than conservation-based alignments. Although we were motivated to improve MSAs for the sake of better exposing couplings, we demonstrate that our MSAs are also improvements in terms of traditional metrics of assessment. We demonstrate the effectiveness of ARMiCoRe on a large collection of data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rahman:2013:REM, author = "Ahsanur Rahman and Christopher L. Poirel and David J. Badger and Craig Estep and T. M. Murali", title = "Reverse Engineering Molecular Hypergraphs", journal = j-TCBB, volume = "10", number = "5", pages = "1113--1124", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.71", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analysis of molecular interaction networks is pervasive in systems biology. This research relies almost entirely on graphs for modeling interactions. However, edges in graphs cannot represent multiway interactions among molecules, which occur very often within cells. Hypergraphs may be better representations for networks having such interactions, since hyperedges can naturally represent relationships among multiple molecules. Here, we propose using hypergraphs to capture the uncertainty inherent in reverse engineering gene-gene networks. Some subsets of nodes may induce highly varying subgraphs across an ensemble of networks inferred by a reverse engineering algorithm. We provide a novel formulation of hyperedges to capture this uncertainty in network topology. We propose a clustering-based approach to discover hyperedges. We show that our approach can recover hyperedges planted in synthetic data sets with high precision and recall, even for moderate amount of noise. We apply our techniques to a data set of pathways inferred from genetic interaction data in S. cerevisiae related to the unfolded protein response. Our approach discovers several hyperedges that capture the uncertain connectivity of genes in relevant protein complexes, suggesting that further experiments may be required to precisely discern their interaction patterns. We also show that these complexes are not discovered by an algorithm that computes frequent and dense subgraphs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Caglar:2013:SMS, author = "Mehmet Umut Caglar and Ranadip Pal", title = "Stochastic Model Simulation Using {Kronecker} Product Analysis and {Zassenhaus} Formula Approximation", journal = j-TCBB, volume = "10", number = "5", pages = "1125--1136", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.34", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Probabilistic Models are regularly applied in Genetic Regulatory Network modeling to capture the stochastic behavior observed in the generation of biological entities such as mRNA or proteins. Several approaches including Stochastic Master Equations and Probabilistic Boolean Networks have been proposed to model the stochastic behavior in genetic regulatory networks. It is generally accepted that Stochastic Master Equation is a fundamental model that can describe the system being investigated in fine detail, but the application of this model is computationally enormously expensive. On the other hand, Probabilistic Boolean Network captures only the coarse-scale stochastic properties of the system without modeling the detailed interactions. We propose a new approximation of the stochastic master equation model that is able to capture the finer details of the modeled system including bistabilities and oscillatory behavior, and yet has a significantly lower computational complexity. In this new method, we represent the system using tensors and derive an identity to exploit the sparse connectivity of regulatory targets for complexity reduction. The algorithm involves an approximation based on Zassenhaus formula to represent the exponential of a sum of matrices as product of matrices. We derive upper bounds on the expected error of the proposed model distribution as compared to the stochastic master equation model distribution. Simulation results of the application of the model to four different biological benchmark systems illustrate performance comparable to detailed stochastic master equation models but with considerably lower computational complexity. The results also demonstrate the reduced complexity of the new approach as compared to commonly used Stochastic Simulation Algorithm for equivalent accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tsai:2013:CBM, author = "Ming-Chi Tsai and Guy E. Blelloch and R. Ravi and Russell Schwartz", title = "Coalescent-Based Method for Learning Parameters of Admixture Events from Large-Scale Genetic Variation Data", journal = j-TCBB, volume = "10", number = "5", pages = "1137--1149", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.98", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting and quantifying the timing and the genetic contributions of parental populations to a hybrid population is an important but challenging problem in reconstructing evolutionary histories from genetic variation data. With the advent of high throughput genotyping technologies, new methods suitable for large-scale data are especially needed. Furthermore, existing methods typically assume the assignment of individuals into subpopulations is known, when that itself is a difficult problem often unresolved for real data. Here, we propose a novel method that combines prior work for inferring nonreticulate population structures with an MCMC scheme for sampling over admixture scenarios to both identify population assignments and learn divergence times and admixture proportions for those populations using genome-scale admixed genetic variation data. We validated our method using coalescent simulations and a collection of real bovine and human variation data. On simulated sequences, our methods show better accuracy and faster runtime than leading competitive methods in estimating admixture fractions and divergence times. Analysis on the real data further shows our methods to be effective at matching our best current knowledge about the relevant populations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tataw:2013:QAL, author = "Oben M. Tataw and Gonehal Venugopala Reddy and Eamonn J. Keogh and Amit K. Roy-Chowdhury", title = "Quantitative Analysis of Live-Cell Growth at the Shoot Apex of \bioname{Arabidopsis thaliana}: Algorithms for Feature Measurement and Temporal Alignment", journal = j-TCBB, volume = "10", number = "5", pages = "1150--1161", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.64", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Study of the molecular control of organ growth requires establishment of the causal relationship between gene expression and cell behaviors. We seek to understand this relationship at the shoot apical meristem (SAM) of model plant \bioname{Arabidopsis thaliana}. This requires the spatial mapping and temporal alignment of different functional domains into a single template. Live-cell imaging techniques allow us to observe real-time organ primordia growth and gene expression dynamics at cellular resolution. In this paper, we propose a framework for the measurement of growth features at the 3D reconstructed surface of organ primordia, as well as algorithms for robust time alignment of primordia. We computed areas and deformation values from reconstructed 3D surfaces of individual primordia from live-cell imaging data. Based on these growth measurements, we applied a multiple feature landscape matching (LAM-M) algorithm to ensure a reliable temporal alignment of multiple primordia. Although the original landscape matching (LAM) algorithm motivated our alignment approach, it sometimes fails to properly align growth curves in the presence of high noise/distortion. To overcome this shortcoming, we modified the cost function to consider the landscape of the corresponding growth features. We also present an alternate parameter-free growth alignment algorithm which performs as well as LAM-M for high-quality data, but is more robust to the presence of outliers or noise. Results on primordia and guppy evolutionary growth data show that the proposed alignment framework performs at least as well as the LAM algorithm in the general case, and significantly better in the case of increased noise.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Molloy:2013:PSE, author = "Kevin Molloy and Sameh Saleh and Amarda Shehu", title = "Probabilistic Search and Energy Guidance for Biased Decoy Sampling in Ab Initio Protein Structure Prediction", journal = j-TCBB, volume = "10", number = "5", pages = "1162--1175", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.29", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Adequate sampling of the conformational space is a central challenge in ab initio protein structure prediction. In the absence of a template structure, a conformational search procedure guided by an energy function explores the conformational space, gathering an ensemble of low-energy decoy conformations. If the sampling is inadequate, the native structure may be missed altogether. Even if reproduced, a subsequent stage that selects a subset of decoys for further structural detail and energetic refinement may discard near-native decoys if they are high energy or insufficiently represented in the ensemble. Sampling should produce a decoy ensemble that facilitates the subsequent selection of near-native decoys. In this paper, we investigate a robotics-inspired framework that allows directly measuring the role of energy in guiding sampling. Testing demonstrates that a soft energy bias steers sampling toward a diverse decoy ensemble less prone to exploiting energetic artifacts and thus more likely to facilitate retainment of near-native conformations by selection techniques. We employ two different energy functions, the associative memory Hamiltonian with water and Rosetta. Results show that enhanced sampling provides a rigorous testing of energy functions and exposes different deficiencies in them, thus promising to guide development of more accurate representations and energy functions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2013:EEO, author = "Yao-ming Huang and Chris Bystroff", title = "Expanded Explorations into the Optimization of an Energy Function for Protein Design", journal = j-TCBB, volume = "10", number = "5", pages = "1176--1187", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.113", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nature possesses a secret formula for the energy as a function of the structure of a protein. In protein design, approximations are made to both the structural representation of the molecule and to the form of the energy equation, such that the existence of a general energy function for proteins is by no means guaranteed. Here, we present new insights toward the application of machine learning to the problem of finding a general energy function for protein design. Machine learning requires the definition of an objective function, which carries with it the implied definition of success in protein design. We explored four functions, consisting of two functional forms, each with two criteria for success. Optimization was carried out by a Monte Carlo search through the space of all variable parameters. Cross-validation of the optimized energy function against a test set gave significantly different results depending on the choice of objective function, pointing to relative correctness of the built-in assumptions. Novel energy cross terms correct for the observed nonadditivity of energy terms and an imbalance in the distribution of predicted amino acids. This paper expands on the work presented at the 2012 ACM-BCB.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Blumenthal:2013:IIR, author = "Seth Blumenthal and Yisheng Tang and Wenjie Yang and Brian Chen", title = "Isolating Influential Regions of Electrostatic Focusing in Protein and {DNA} Structure", journal = j-TCBB, volume = "10", number = "5", pages = "1188--1198", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.124", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Electrostatic focusing is a general phenomenon that occurs in cavities and grooves on the molecular surface of biomolecules. Narrow surface features can partially shield charged atoms from the high-dielectric solvent, enhancing electrostatic potentials inside the cavity and projecting electric field lines outward into the solvent. This effect has been observed in many instances and is widely considered in the human examination of molecular structure, but it is rarely integrated into the digital representations used in protein structure comparison software. To create a computational representation of electrostatic focusing, that is compatible with structure comparison algorithms, this paper presents an approach that generates three-dimensional solids that approximate regions where focusing occurs. We verify the accuracy of this representation against instances of focusing in proteins and DNA. Noting that this representation also identifies thin focusing regions on the molecular surface that are unlikely to affect binding, we describe a second algorithm that conservatively isolates larger focusing regions. The resulting 3D solids can be compared with Boolean set operations, permitting a new range of analyses on the regions where electrostatic focusing occurs. They also represent a novel integration of molecular shape and electrostatic focusing into the same structure comparison framework.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kuksa:2013:BSC, author = "Pavel P. Kuksa", title = "Biological Sequence Classification with Multivariate String Kernels", journal = j-TCBB, volume = "10", number = "5", pages = "1201--1210", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.15", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "String kernel-based machine learning methods have yielded great success in practical tasks of structured/sequential data analysis. They often exhibit state-of-the-art performance on many practical tasks of sequence analysis such as biological sequence classification, remote homology detection, or protein superfamily and fold prediction. However, typical string kernel methods rely on the analysis of discrete 1D string data (e.g., DNA or amino acid sequences). In this paper, we address the multiclass biological sequence classification problems using multivariate representations in the form of sequences of features vectors (as in biological sequence profiles, or sequences of individual amino acid physicochemical descriptors) and a class of multivariate string kernels that exploit these representations. On three protein sequence classification tasks, the proposed multivariate representations and kernels show significant 15-20 percent improvements compared to existing state-of-the-art sequence classification methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bleik:2013:TCB, author = "Said Bleik and Meenakshi Mishra and Jun Huan and Min Song", title = "Text Categorization of Biomedical Data Sets Using Graph Kernels and a Controlled Vocabulary", journal = j-TCBB, volume = "10", number = "5", pages = "1211--1217", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.16", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, graph representations of text have been showing improved performance over conventional bag-of-words representations in text categorization applications. In this paper, we present a graph-based representation for biomedical articles and use graph kernels to classify those articles into high-level categories. In our representation, common biomedical concepts and semantic relationships are identified with the help of an existing ontology and are used to build a rich graph structure that provides a consistent feature set and preserves additional semantic information that could improve a classifier's performance. We attempt to classify the graphs using both a set-based graph kernel that is capable of dealing with the disconnected nature of the graphs and a simple linear kernel. Finally, we report the results comparing the classification performance of the kernel classifiers to common text-based classifiers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yan:2013:CNE, author = "Su Yan and W. Scott Spangler and Ying Chen", title = "Chemical Name Extraction Based on Automatic Training Data Generation and Rich Feature Set", journal = j-TCBB, volume = "10", number = "5", pages = "1218--1233", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.101", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The automation of extracting chemical names from text has significant value to biomedical and life science research. A major barrier in this task is the difficulty of getting a sizable and good quality data to train a reliable entity extraction model. Another difficulty is the selection of informative features of chemical names, since comprehensive domain knowledge on chemistry nomenclature is required. Leveraging random text generation techniques, we explore the idea of automatically creating training sets for the task of chemical name extraction. Assuming the availability of an incomplete list of chemical names, called a dictionary, we are able to generate well-controlled, random, yet realistic chemical-like training documents. We statistically analyze the construction of chemical names based on the incomplete dictionary, and propose a series of new features, without relying on any domain knowledge. Compared to state-of-the-art models learned from manually labeled data and domain knowledge, our solution shows better or comparable results in annotating real-world data with less human effort. Moreover, we report an interesting observation about the language for chemical names. That is, both the structural and semantic components of chemical names follow a Zipfian distribution, which resembles many natural languages.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Florea:2013:GGT, author = "Liliana D. Florea and Steven L. Salzberg", title = "Genome-Guided Transcriptome Assembly in the Age of Next-Generation Sequencing", journal = j-TCBB, volume = "10", number = "5", pages = "1234--1240", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.140", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Next generation sequencing technologies provide unprecedented power to explore the repertoire of genes and their alternative splice variants, collectively defining the transcriptome of a species in great detail. However, assembling the short reads into full-length gene and transcript models presents significant computational challenges. We review current algorithms for assembling transcripts and genes from next generation sequencing reads aligned to a reference genome, and lay out areas for future improvements.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shakya:2013:AWL, author = "Devendra K. Shakya and Rajiv Saxena and Sanjeev N. Sharma", title = "An Adaptive Window Length Strategy for Eukaryotic {CDS} Prediction", journal = j-TCBB, volume = "10", number = "5", pages = "1241--1252", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.76", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Signal processing-based algorithms for identification of coding sequences (CDS) in eukaryotes are non-data driven and exploit the presence of three-base periodicity in these regions for their detection. Three-base periodicity is commonly detected using short time Fourier transform (STFT) that uses a window function of fixed length. As the length of the protein coding and noncoding regions varies widely, the identification accuracy of STFT-based algorithms is poor. In this paper, a novel signal processing-based algorithm is developed by enabling the window length adaptation in STFT of DNA sequences for improving the identification of three-base periodicity. The length of the window function has been made adaptive in coding regions to maximize the magnitude of period-3 measure, whereas in the noncoding regions, the window length is tailored to minimize this measure. Simulation results on bench mark data sets demonstrate the advantage of this algorithm when compared with other non-data-driven methods for CDS prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rosenberg:2013:CHC, author = "Noah A. Rosenberg", title = "Coalescent Histories for Caterpillar-Like Families", journal = j-TCBB, volume = "10", number = "5", pages = "1253--1262", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.123", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A coalescent history is an assignment of branches of a gene tree to branches of a species tree on which coalescences in the gene tree occur. The number of coalescent histories for a pair consisting of a labeled gene tree topology and a labeled species tree topology is important in gene tree probability computations, and more generally, in studying evolutionary possibilities for gene trees on species trees. Defining the $ (T_r) $-caterpillar-like family as a sequence of $ (n) $-taxon trees constructed by replacing the $ (r) $-taxon subtree of $ (n) $-taxon caterpillars by a specific $ (r) $-taxon labeled topology $ (T_r) $, we examine the number of coalescent histories for caterpillar-like families with matching gene tree and species tree labeled topologies. For each $ (T_r) $ with size $ (r \le 8) $, we compute the number of coalescent histories for $ (n) $-taxon trees in the $ (T_r) $-caterpillar-like family. Next, as $ (n \rightarrow \infty) $, we find that the limiting ratio of the numbers of coalescent histories for the $ (T_r) $ family and caterpillars themselves is correlated with the number of labeled histories for $ (T_r) $. The results support a view that large numbers of coalescent histories occur when a tree has both a relatively balanced subtree and a high tree depth, contributing to deeper understanding of the combinatorics of gene trees and species trees.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wandelt:2013:FRC, author = "Sebastian Wandelt and Ulf Leser", title = "{FRESCO}: Referential Compression of Highly Similar Sequences", journal = j-TCBB, volume = "10", number = "5", pages = "1275--1288", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.122", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In many applications, sets of similar texts or sequences are of high importance. Prominent examples are revision histories of documents or genomic sequences. Modern high-throughput sequencing technologies are able to generate DNA sequences at an ever-increasing rate. In parallel to the decreasing experimental time and cost necessary to produce DNA sequences, computational requirements for analysis and storage of the sequences are steeply increasing. Compression is a key technology to deal with this challenge. Recently, referential compression schemes, storing only the differences between a to-be-compressed input and a known reference sequence, gained a lot of interest in this field. In this paper, we propose a general open-source framework to compress large amounts of biological sequence data called Framework for REferential Sequence COmpression (FRESCO). Our basic compression algorithm is shown to be one to two orders of magnitudes faster than comparable related work, while achieving similar compression ratios. We also propose several techniques to further increase compression ratios, while still retaining the advantage in speed: (1) selecting a good reference sequence; and (2) rewriting a reference sequence to allow for better compression. In addition, we propose a new way of further boosting the compression ratios by applying referential compression to already referentially compressed files (second-order compression). This technique allows for compression ratios way beyond state of the art, for instance, 4,000:1 and higher for human genomes. We evaluate our algorithms on a large data set from three different species (more than 1,000 genomes, more than 3 TB) and on a collection of versions of Wikipedia pages. Our results show that real-time compression of highly similar sequences at high compression ratios is possible on modern hardware.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{AlNasr:2013:IBS, author = "Kamal {Al Nasr} and Chunmei Liu and Mugizi Rwebangira and Legand Burge and Jing He", title = "Intensity-Based Skeletonization of {CryoEM} Gray-Scale Images Using a True Segmentation-Free Algorithm", journal = j-TCBB, volume = "10", number = "5", pages = "1289--1298", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.121", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cryo-electron microscopy is an experimental technique that is able to produce 3D gray-scale images of protein molecules. In contrast to other experimental techniques, cryo-electron microscopy is capable of visualizing large molecular complexes such as viruses and ribosomes. At medium resolution, the positions of the atoms are not visible and the process cannot proceed. The medium-resolution images produced by cryo-electron microscopy are used to derive the atomic structure of the proteins in de novo modeling. The skeletons of the 3D gray-scale images are used to interpret important information that is helpful in de novo modeling. Unfortunately, not all features of the image can be captured using a single segmentation. In this paper, we present a segmentation-free approach to extract the gray-scale curve-like skeletons. The approach relies on a novel representation of the 3D image, where the image is modeled as a graph and a set of volume trees. A test containing 36 synthesized maps and one authentic map shows that our approach can improve the performance of the two tested tools used in de novo modeling. The improvements were 62 and 13 percent for Gorgon and DP-TOSS, respectively.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chuang:2013:OPU, author = "Li-Yeh Chuang and Cheng-Huei Yang and Jui-Hung Tsai and Cheng-Hong Yang", title = "Operon Prediction Using Chaos Embedded Particle Swarm Optimization", journal = j-TCBB, volume = "10", number = "5", pages = "1299--1309", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.63", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Operons contain valuable information for drug design and determining protein functions. Genes within an operon are co-transcribed to a single-strand mRNA and must be coregulated. The identification of operons is, thus, critical for a detailed understanding of the gene regulations. However, currently used experimental methods for operon detection are generally difficult to implement and time consuming. In this paper, we propose a chaotic binary particle swarm optimization (CBPSO) to predict operons in bacterial genomes. The intergenic distance, participation in the same metabolic pathway and the cluster of orthologous groups (COG) properties of the Escherichia coli genome are used to design a fitness function. Furthermore, the Bacillus subtilis, Pseudomonas aeruginosa PA01, Staphylococcus aureus and Mycobacterium tuberculosis genomes are tested and evaluated for accuracy, sensitivity, and specificity. The computational results indicate that the proposed method works effectively in terms of enhancing the performance of the operon prediction. The proposed method also achieved a good balance between sensitivity and specificity when compared to methods from the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zarai:2013:EES, author = "Yoram Zarai and Michael Margaliot and Tamir Tuller", title = "Explicit Expression for the Steady-State Translation Rate in the Infinite-Dimensional Homogeneous Ribosome Flow Model", journal = j-TCBB, volume = "10", number = "5", pages = "1322--1328", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.120", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene translation is a central stage in the intracellular process of protein synthesis. Gene translation proceeds in three major stages: initiation, elongation, and termination. During the elongation step, ribosomes (intracellular macromolecules) link amino acids together in the order specified by messenger RNA (mRNA) molecules. The homogeneous ribosome flow model (HRFM) is a mathematical model of translation-elongation under the assumption of constant elongation rate along the mRNA sequence. The HRFM includes $ (n) $ first-order nonlinear ordinary differential equations, where $ (n) $ represents the length of the mRNA sequence, and two positive parameters: ribosomal initiation rate and the (constant) elongation rate. Here, we analyze the HRFM when $ (n) $ goes to infinity and derive a simple expression for the steady-state protein synthesis rate. We also derive bounds that show that the behavior of the HRFM for finite, and relatively small, values of $ (n) $ is already in good agreement with the closed-form result in the infinite-dimensional case. For example, for $ (n = 15) $, the relative error is already less than 4 percent. Our results can, thus, be used in practice for analyzing the behavior of finite-dimensional HRFMs that model translation. To demonstrate this, we apply our approach to estimate the mean initiation rate in M. musculus, finding it to be around 0.17 codons per second.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bach:2013:SND, author = "Christian Bach and Prabir Patra and Jani M. Pallis and William Sherman and Hassan Bajwa", title = "Strategy for Naturelike Designer Transcription Factors with Reduced Toxicity", journal = j-TCBB, volume = "10", number = "5", pages = "1340--1343", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.107", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "For clinical applications, the biological functions of DNA-binding proteins require that they interact with their target binding site with high affinity and specificity. Advances in randomized production and target-oriented selection of engineered artificial DNA-binding domains incited a rapidly expanding field of designer transcription factors (TFs). Engineered transcription factors are used in zinc-finger nuclease (ZFN) technology that allows targeted genome editing. Zinc-finger-binding domains fabricated by modular assembly display an unexpectedly high failure rate having either a lack of activity as ZFNs in human cells or activity at ``off-target EUR' binding sites on the human genome causing cell death. To address these shortcomings, we created new binding domains using a targeted modification strategy. We produced two SP1 mutants by exchanging amino acid residues in the alpha-helical region of the transcription factor SP1. We identified their best target binding sites and searched the NCBI HuRef genome for matches of the nine-base-pair consensus binding site of SP1 and the best binding sites of its mutants. Our research concludes that we can alter the binding preference of existing zinc-finger domains without altering its biological functionalities.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2013:IOA, author = "Anonymous", title = "{IEEE} Open Access Publishing", journal = j-TCBB, volume = "10", number = "5", pages = "1344--1344", month = sep, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.183", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Jan 9 15:34:03 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mandoiu:2013:GEI, author = "Ion I. Mandoiu and Jianxin Wang and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "10", number = "6", pages = "1345--1346", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.189", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2013:RTR, author = "Xi Chen and Jianhua Xuan and Chen Wang and Ayesha N. Shajahan and Rebecca B. Riggins and Robert Clarke", title = "Reconstruction of Transcriptional Regulatory Networks by Stability-Based Network Component Analysis", journal = j-TCBB, volume = "10", number = "6", pages = "1347--1358", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.146", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reliable inference of transcription regulatory networks is a challenging task in computational biology. Network component analysis (NCA) has become a powerful scheme to uncover regulatory networks behind complex biological processes. However, the performance of NCA is impaired by the high rate of false connections in binding information. In this paper, we integrate stability analysis with NCA to form a novel scheme, namely stability-based NCA (sNCA), for regulatory network identification. The method mainly addresses the inconsistency between gene expression data and binding motif information. Small perturbations are introduced to prior regulatory network, and the distance among multiple estimated transcript factor (TF) activities is computed to reflect the stability for each TF's binding network. For target gene identification, multivariate regression and t-statistic are used to calculate the significance for each TF-gene connection. Simulation studies are conducted and the experimental results show that sNCA can achieve an improved and robust performance in TF identification as compared to NCA. The approach for target gene identification is also demonstrated to be suitable for identifying true connections between TFs and their target genes. Furthermore, we have successfully applied sNCA to breast cancer data to uncover the role of TFs in regulating endocrine resistance in breast cancer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Re:2013:NBD, author = "Matteo Re and Giorgio Valentini", title = "Network-Based Drug Ranking and Repositioning with Respect to {DrugBank} Therapeutic Categories", journal = j-TCBB, volume = "10", number = "6", pages = "1359--1371", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.62", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Drug repositioning is a challenging computational problem involving the integration of heterogeneous sources of biomolecular data and the design of label ranking algorithms able to exploit the overall topology of the underlying pharmacological network. In this context, we propose a novel semisupervised drug ranking problem: prioritizing drugs in integrated biochemical networks according to specific DrugBank therapeutic categories. Algorithms for drug repositioning usually perform the inference step into an inhomogeneous similarity space induced by the relationships existing between drugs and a second type of entity (e.g., disease, target, ligand set), thus making infeasible a drug ranking within a homogeneous pharmacological space. To deal with this problem, we designed a general framework based on bipartite network projections by which homogeneous pharmacological networks can be constructed and integrated from heterogeneous and complementary sources of chemical, biomolecular and clinical information. Moreover, we present a novel algorithmic scheme based on kernelized score functions that adopts both local and global learning strategies to effectively rank drugs in the integrated pharmacological space using different network combination methods. Detailed experiments with more than 80 DrugBank therapeutic categories involving about 1,300 FDA-approved drugs show the effectiveness of the proposed approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wylie:2013:PCP, author = "Tim Wylie and Binhai Zhu", title = "Protein Chain Pair Simplification under the Discrete {Fr{\'e}chet} Distance", journal = j-TCBB, volume = "10", number = "6", pages = "1372--1383", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.17", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "For protein structure alignment and comparison, a lot of work has been done using RMSD as the distance measure, which has drawbacks under certain circumstances. Thus, the discrete Fr{\'e}chet distance was recently applied to the problem of protein (backbone) structure alignment and comparison with promising results. For this problem, visualization is also important because protein chain backbones can have as many as 500-600 $ (\alpha) $-carbon atoms, which constitute the vertices in the comparison. Even with an excellent alignment, the similarity of two polygonal chains can be difficult to visualize unless the chains are nearly identical. Thus, the chain pair simplification problem (CPS-3F) was proposed in 2008 to simultaneously simplify both chains with respect to each other under the discrete Fr{\'e}chet distance. The complexity of CPS-3F is unknown, so heuristic methods have been developed. Here, we define a variation of CPS-3F, called the constrained CPS-3F problem ($ ({\rm CPS \hbox {-}3F}^+) $ ), and prove that it is polynomially solvable by presenting a dynamic programming solution, which we then prove is a factor-2 approximation for CPS-3F. We then compare the $ ({\rm CPS \hbox {-}3F}^+) $ solutions with previous empirical results, and further demonstrate some of the benefits of the simplified comparisons. Chain pair simplification based on the Hausdorff distance (CPS-2H) is known to be NP-complete, and here we prove that the constrained version ($ (\rm C P S \hbox {-}2 H^+) $ ) is also NP-complete. Finally, we discuss future work and implications along with a software library implementation, named the Fr{\'e}chet-based Protein Alignment {\&} Comparison Toolkit (FPACT).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bulteau:2013:IED, author = "Laurent Bulteau and Minghui Jiang", title = "Inapproximability of $ ((1, 2)) $-Exemplar Distance", journal = j-TCBB, volume = "10", number = "6", pages = "1384--1390", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.144", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Given two genomes possibly with duplicate genes, the exemplar distance problem is that of removing all but one copy of each gene in each genome, so as to minimize the distance between the two reduced genomes according to some measure. Let $ ((s, t)) $-exemplar distance denote the exemplar distance problem on two genomes $ (G_1) $ and $ (G_2) $, where each gene occurs at most $ (s) $ times in $ (G_1) $ and at most $ (t) $ times in $ (G_2) $. We show that the simplest nontrivial variant of the exemplar distance problem, $ ((1, 2)) $-Exemplar Distance, is already hard to approximate for a wide variety of distance measures, including both popular genome rearrangement measures such as adjacency disruptions, signed reversals, and signed double-cut-and-joins, and classic string edit distance measures such as Levenshtein and Hamming distances.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Catanzaro:2013:IPF, author = "Daniele Catanzaro and Martine Labbe and Bjarni V. Halldorsson", title = "An Integer Programming Formulation of the Parsimonious Loss of Heterozygosity Problem", journal = j-TCBB, volume = "10", number = "6", pages = "1391--1402", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A loss of heterozygosity (LOH) event occurs when, by the laws of Mendelian inheritance, an individual should be heterozygote at a given site but, due to a deletion polymorphism, is not. Deletions play an important role in human disease and their detection could provide fundamental insights for the development of new diagnostics and treatments. In this paper, we investigate the parsimonious loss of heterozygosity problem (PLOHP), i.e., the problem of partitioning suspected polymorphisms from a set of individuals into a minimum number of deletion areas. Specifically, we generalize Halld{\'o}rsson et al.'s work by providing a more general formulation of the PLOHP and by showing how one can incorporate different recombination rates and prior knowledge about the locations of deletions. Moreover, we show that the PLOHP can be formulated as a specific version of the clique partition problem in a particular class of graphs called undirected catch-point interval graphs and we prove its general $ ({\cal NP}) $-hardness. Finally, we provide a state-of-the-art integer programming (IP) formulation and strengthening valid inequalities to exactly solve real instances of the PLOHP containing up to 9,000 individuals and 3,000 SNPs. Our results give perspectives on the mathematics of the PLOHP and suggest new directions on the development of future efficient exact solution approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Christinat:2013:TPE, author = "Yann Christinat and Bernard M. E. Moret", title = "A Transcript Perspective on Evolution", journal = j-TCBB, volume = "10", number = "6", pages = "1403--1411", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.145", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Alternative splicing is now recognized as a major mechanism for transcriptome and proteome diversity in higher eukaryotes, yet its evolution is poorly understood. Most studies focus on the evolution of exons and introns at the gene level, while only few consider the evolution of transcripts. In this paper, we present a framework for transcript phylogenies where ancestral transcripts evolve along the gene tree by gains, losses, and mutation. We demonstrate the usefulness of our method on a set of 805 genes and two different topics. First, we improve a method for transcriptome reconstruction from ESTs (ASPic), then we study the evolution of function in transcripts. The use of transcript phylogenies allows us to double the precision of ASPic, whereas results on the functional study reveal that conserved transcripts are more likely to share protein domains than functional sites. These studies validate our framework for the study of evolution in large collections of organisms from the perspective of transcripts; for this purpose, we developed and provide a new tool, TrEvoR.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2013:MLI, author = "Si Li and Kwok Pui Choi and Taoyang Wu and Louxin Zhang", title = "Maximum Likelihood Inference of the Evolutionary History of a {PPI} Network from the Duplication History of Its Proteins", journal = j-TCBB, volume = "10", number = "6", pages = "1412--1421", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.14", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Evolutionary history of protein-protein interaction (PPI) networks provides valuable insight into molecular mechanisms of network growth. In this paper, we study how to infer the evolutionary history of a PPI network from its protein duplication relationship. We show that for a plausible evolutionary history of a PPI network, its relative quality, measured by the so-called loss number, is independent of the growth parameters of the network and can be computed efficiently. This finding leads us to propose two fast maximum likelihood algorithms to infer the evolutionary history of a PPI network given the duplication history of its proteins. Simulation studies demonstrated that our approach, which takes advantage of protein duplication information, outperforms NetArch, the first maximum likelihood algorithm for PPI network history reconstruction. Using the proposed method, we studied the topological change of the PPI networks of the yeast, fruitfly, and worm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Subramanian:2013:NMS, author = "Ayshwarya Subramanian and Stanley Shackney and Russell Schwartz", title = "Novel Multisample Scheme for Inferring Phylogenetic Markers from Whole Genome Tumor Profiles", journal = j-TCBB, volume = "10", number = "6", pages = "1422--1431", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.33", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational cancer phylogenetics seeks to enumerate the temporal sequences of aberrations in tumor evolution, thereby delineating the evolution of possible tumor progression pathways, molecular subtypes, and mechanisms of action. We previously developed a pipeline for constructing phylogenies describing evolution between major recurring cell types computationally inferred from whole-genome tumor profiles. The accuracy and detail of the phylogenies, however, depend on the identification of accurate, high-resolution molecular markers of progression, i.e., reproducible regions of aberration that robustly differentiate different subtypes and stages of progression. Here, we present a novel hidden Markov model (HMM) scheme for the problem of inferring such phylogenetically significant markers through joint segmentation and calling of multisample tumor data. Our method classifies sets of genome-wide DNA copy number measurements into a partitioning of samples into normal (diploid) or amplified at each probe. It differs from other similar HMM methods in its design specifically for the needs of tumor phylogenetics, by seeking to identify robust markers of progression conserved across a set of copy number profiles. We show an analysis of our method in comparison to other methods on both synthetic and real tumor data, which confirms its effectiveness for tumor phylogeny inference and suggests avenues for future advances.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wehe:2013:EAK, author = "Andre Wehe and J. Gordon Burleigh and Oliver Eulenstein", title = "Efficient Algorithms for Knowledge-Enhanced Supertree and Supermatrix Phylogenetic Problems", journal = j-TCBB, volume = "10", number = "6", pages = "1432--1441", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2012.162", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phylogenetic inference is a computationally difficult problem, and constructing high-quality phylogenies that can build upon existing phylogenetic knowledge and synthesize insights from new data remains a major challenge. We introduce knowledge-enhanced phylogenetic problems for both supertree and supermatrix phylogenetic analyses. These problems seek an optimal phylogenetic tree that can only be assembled from a user-supplied set of, possibly incompatible, phylogenetic relationships. We describe exact polynomial time algorithms for the knowledge-enhanced versions of the NP-hard Robinson Foulds, gene duplication, duplication and loss, and deep coalescence supertree problems. Further, we demonstrate that our algorithms can rapidly improve upon results of local search heuristics for these problems. Finally, we introduce a knowledge-enhanced search heuristic that can be applied to any discrete character data set using the maximum parsimony (MP) phylogenetic problem. Although this approach is not guaranteed to find exact solutions, we show that it also can improve upon solutions from commonly used MP heuristics.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meng:2013:WAC, author = "Tao Meng and Ahmed T. Soliman and Mei-Ling Shyu and Yimin Yang and Shu-Ching Chen and S. S. Iyengar and John Yordy and Puneeth Iyengar", title = "{Wavelet} Analysis in Current Cancer Genome Research: a Survey", journal = j-TCBB, volume = "10", number = "6", pages = "1442--14359", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.134", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the rapid development of next generation sequencing technology, the amount of biological sequence data of the cancer genome increases exponentially, which calls for efficient and effective algorithms that may identify patterns hidden underneath the raw data that may distinguish cancer Achilles' heels. From a signal processing point of view, biological units of information, including DNA and protein sequences, have been viewed as one-dimensional signals. Therefore, researchers have been applying signal processing techniques to mine the potentially significant patterns within these sequences. More specifically, in recent years, wavelet transforms have become an important mathematical analysis tool, with a wide and ever increasing range of applications. The versatility of wavelet analytic techniques has forged new interdisciplinary bounds by offering common solutions to apparently diverse problems and providing a new unifying perspective on problems of cancer genome research. In this paper, we provide a survey of how wavelet analysis has been applied to cancer bioinformatics questions. Specifically, we discuss several approaches of representing the biological sequence data numerically and methods of using wavelet analysis on the numerical sequences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ji:2013:PLS, author = "Shuiwang Ji and Wenlu Zhang and Rongjian Li", title = "A Probabilistic Latent Semantic Analysis Model for Coclustering the Mouse Brain Atlas", journal = j-TCBB, volume = "10", number = "6", pages = "1460--1468", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.135", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The mammalian brain contains cells of a large variety of types. The phenotypic properties of cells of different types are largely the results of distinct gene expression patterns. Therefore, it is of critical importance to characterize the gene expression patterns in the mammalian brain. The Allen Developing Mouse Brain Atlas provides spatiotemporal in situ hybridization gene expression data across multiple stages of mouse brain development. It provides a framework to explore spatiotemporal regulation of gene expression during development. We employ a graph approximation formulation to cocluster the genes and the brain voxels simultaneously for each time point. We show that this formulation can be expressed as a probabilistic latent semantic analysis (PLSA) model, thereby allowing us to use the expectation-maximization algorithm for PLSA to estimate the coclustering parameters. To provide a quantitative comparison with prior methods, we evaluate the coclustering method on a set of standard synthetic data sets. Results indicate that our method consistently outperforms prior methods. We apply our method to cocluster the Allen Developing Mouse Brain Atlas data. Results indicate that our clustering of voxels is more consistent with classical neuroanatomy than those of prior methods. Our analysis also yields sets of genes that are co-expressed in a subset of the brain voxels.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2013:EAZ, author = "Daming Zhu and Lusheng Wang", title = "An Exact Algorithm for the Zero Exemplar Breakpoint Distance Problem", journal = j-TCBB, volume = "10", number = "6", pages = "1469--1477", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The exemplar breakpoint distance problem is one of the most important problems in genome comparison and has been extensively studied in the literature. The exemplar breakpoint distance problem cannot be approximated within any factor even if each gene family occurs at most twice in a genome. This is due to the fact that its decision version, the zero exemplar breakpoint distance problem where each gene family occurs at most twice in a genome (ZEBD$ ((2, 2)) $ for short) is NP-hard. Thus, the basic version ZEBD$ ((2, 2)) $ has attracted the attention of many scientists. The best existing algorithm for ZEBD$ ((2, 2)) $ runs in $ (O(n2^n)) $ time. In this paper, we propose a new algorithm for ZEBD$ ((2, 2)) $ with running time $ (O(n^{21.86121^n})) $. We have implemented the algorithm in Java. The software package is available upon request.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mourad:2013:DPS, author = "Ramy Mourad and Zaher Dawy and Faruck Morcos", title = "Designing Pooling Systems for Noisy High-Throughput Protein-Protein Interaction Experiments Using {Boolean} Compressed Sensing", journal = j-TCBB, volume = "10", number = "6", pages = "1478--1490", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Group testing, also known as pooling, is a common technique used in high-throughput experiments in molecular biology to significantly reduce the number of tests required to identify rare biological interactions while correcting for experimental noise. Central to the group testing problem are (1) a pooling design that lays out how items are grouped together into pools for testing and (2) a decoder that interprets the results of the tested pools, identifying the active compounds. In this work, we take advantage of decoder guarantees from the field of compressed sensing (CS) to address the problem of efficient and reliable detection of biological interaction in noisy high-throughput experiments. We also use efficient combinatorial algorithms from group testing as well as established measurement matrices from CS to create pooling designs. First, we formulate the group testing problem in terms of a Boolean CS framework. We then propose a low-complexity $ (l_1) $-norm decoder to interpret pooling test results and identify active compounds. We demonstrate the robustness of the proposed $ (l_1) $-norm decoder in simulated experiments with false-positive and false-negative error rates typical of high-throughput experiments. When benchmarked against the current state-of-the-art methods, the proposed $ (l_1) $-norm decoder provides superior error correction for the majority of the cases considered while being notably faster computationally. Additionally, we test the performance of the $ (l_1) $-norm decoder against a real experimental data set, where 12,675 prey proteins were screened against 12 bait proteins. Lastly, we study the impact of different sparse pooling design matrices on decoder performance and show that the shifted transversal design (STD) is the most suitable among the pooling designs surveyed for biological applications of CS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ahirwal:2013:EEA, author = "M. K. Ahirwal and A. Kumar and G. K. Singh", title = "{EEG\slash ERP} Adaptive Noise Canceller Design with {Controlled Search Space (CSS)} Approach in Cuckoo and Other Optimization Algorithms", journal = j-TCBB, volume = "10", number = "6", pages = "1491--1504", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.119", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper explores the migration of adaptive filtering with swarm intelligence/evolutionary techniques employed in the field of electroencephalogram/event-related potential noise cancellation or extraction. A new approach is proposed in the form of controlled search space to stabilize the randomness of swarm intelligence techniques especially for the EEG signal. Swarm-based algorithms such as Particles Swarm Optimization, Artificial Bee Colony, and Cuckoo Optimization Algorithm with their variants are implemented to design optimized adaptive noise canceler. The proposed controlled search space technique is tested on each of the swarm intelligence techniques and is found to be more accurate and powerful. Adaptive noise canceler with traditional algorithms such as least-mean-square, normalized least-mean-square, and recursive least-mean-square algorithms are also implemented to compare the results. ERP signals such as simulated visual evoked potential, real visual evoked potential, and real sensorimotor evoked potential are used, due to their physiological importance in various EEG studies. Average computational time and shape measures of evolutionary techniques are observed 8.21E-01 sec and 1.73E-01, respectively. Though, traditional algorithms take negligible time consumption, but are unable to offer good shape preservation of ERP, noticed as average computational time and shape measure difference, 1.41E-02 sec and 2.60E+00, respectively.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kaddi:2013:MHS, author = "Chanchala D. Kaddi and R. Mitchell Parry and May D. Wang", title = "Multivariate Hypergeometric Similarity Measure", journal = j-TCBB, volume = "10", number = "6", pages = "1505--1516", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.28", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a similarity measure based on the multivariate hypergeometric distribution for the pairwise comparison of images and data vectors. The formulation and performance of the proposed measure are compared with other similarity measures using synthetic data. A method of piecewise approximation is also implemented to facilitate application of the proposed measure to large samples. Example applications of the proposed similarity measure are presented using mass spectrometry imaging data and gene expression microarray data. Results from synthetic and biological data indicate that the proposed measure is capable of providing meaningful discrimination between samples, and that it can be a useful tool for identifying potentially related samples in large-scale biological data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2013:PPL, author = "Ginny Y. Wong and Frank H. F. Leung and Sai-Ho Ling", title = "Predicting Protein-Ligand Binding Site Using Support Vector Machine with Protein Properties", journal = j-TCBB, volume = "10", number = "6", pages = "1517--1529", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of protein-ligand binding site is an important task in structure-based drug design and docking algorithms. In the past two decades, different approaches have been developed to predict the binding site, such as the geometric, energetic, and sequence-based methods. When scores are calculated from these methods, the algorithm for doing classification becomes very important and can affect the prediction results greatly. In this paper, the support vector machine (SVM) is used to cluster the pockets that are most likely to bind ligands with the attributes of geometric characteristics, interaction potential, offset from protein, conservation score, and properties surrounding the pockets. Our approach is compared to LIGSITE, $ ({\rm LIGSITE}^{{\rm csc}}) $, SURFNET, Fpocket, PocketFinder, Q-SiteFinder, ConCavity, and MetaPocket on the data set LigASite and 198 drug-target protein complexes. The results show that our approach improves the success rate from 60 to 80 percent at AUC measure and from 61 to 66 percent at top 1 prediction. Our method also provides more comprehensive results than the others.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Braz:2013:PMC, author = "Fernando A. F. Braz and Jader S. Cruz and Alessandra C. Faria-Campos and Sergio V. A. Campos", title = "Probabilistic Model Checking Analysis of Palytoxin Effects on Cell Energy Reactions of the {Na+\slash K+-ATPase}", journal = j-TCBB, volume = "10", number = "6", pages = "1530--1541", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.97", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Probabilistic model checking (PMC) is a technique used for the specification and analysis of complex systems. It can be applied directly to biological systems which present these characteristics, including cell transport systems. These systems are structures responsible for exchanging ions through the plasma membrane. Their correct behavior is essential for animal cells, since changes on those are responsible for diseases. In this work, PMC is used to model and analyze the effects of the palytoxin toxin (PTX) interactions with one of these systems. Our model suggests that ATP could inhibit PTX action. Therefore, individuals with ATP deficiencies, such as in brain disorders, may be more susceptible to the toxin. We have also used heat maps to enhance the kinetic model, which is used to describe the system reactions. The map reveals unexpected situations, such as a frequent reaction between unlikely pump states, and hot spots such as likely states and reactions. This type of analysis provides a better understanding on how transmembrane ionic transport systems behave and may lead to the discovery and development of new drugs to treat diseases associated to their incorrect behavior.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2013:CPP, author = "Chao Yang and Zengyou He and Weichuan Yu", title = "A Combinatorial Perspective of the Protein Inference Problem", journal = j-TCBB, volume = "10", number = "6", pages = "1542--1547", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.110", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In a shotgun proteomics experiment, proteins are the most biologically meaningful output. The success of proteomics studies depends on the ability to accurately and efficiently identify proteins. Many methods have been proposed to facilitate the identification of proteins from peptide identification results. However, the relationship between protein identification and peptide identification has not been thoroughly explained before. In this paper, we devote ourselves to a combinatorial perspective of the protein inference problem. We employ combinatorial mathematics to calculate the conditional protein probabilities (protein probability means the probability that a protein is correctly identified) under three assumptions, which lead to a lower bound, an upper bound, and an empirical estimation of protein probabilities, respectively. The combinatorial perspective enables us to obtain an analytical expression for protein inference. Our method achieves comparable results with ProteinProphet in a more efficient manner in experiments on two data sets of standard protein mixtures and two data sets of real samples. Based on our model, we study the impact of unique peptides and degenerate peptides (degenerate peptides are peptides shared by at least two proteins) on protein probabilities. Meanwhile, we also study the relationship between our model and ProteinProphet. We name our program ProteinInfer. Its Java source code, our supplementary document and experimental results are available at http://bioinformatics.ust.hk/proteininfer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Oakley:2013:PSO, author = "Mark T. Oakley and Elizabeth Grace Richardson and Harriet Carr and Roy L. Johnston", title = "Protein Structure Optimization with a ``{Lamarckian}'' Ant Colony Algorithm", journal = j-TCBB, volume = "10", number = "6", pages = "1548--1552", month = nov, year = "2013", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.125", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Feb 28 05:26:07 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We describe the LamarckiAnt algorithm: a search algorithm that combines the features of a ``Lamarckian'' genetic algorithm and ant colony optimization. We have implemented this algorithm for the optimization of BLN model proteins, which have frustrated energy landscapes and represent a challenge for global optimization algorithms. We demonstrate that LamarckiAnt performs competitively with other state-of-the-art optimization algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2014:EEC, author = "Ying Xu", title = "Editorial from the {Editor-in-Chief}", journal = j-TCBB, volume = "11", number = "1", pages = "1--1", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2302365", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Eisenhaber:2014:GEI, author = "Frank Eisenhaber and Wing-Kin Sung and Limsoon Wong", title = "Guest editorial for the international conference on genome informatics {(GIW 2013)}", journal = j-TCBB, volume = "11", number = "1", pages = "5--6", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2299751", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2014:CGE, author = "Liang Zhao and Steven C. H. Hoi and Zhenhua Li and Limsoon Wong and Hung Nguyen and Jinyan Li", title = "Coupling graphs, efficient algorithms and {B}-cell epitope prediction", journal = j-TCBB, volume = "11", number = "1", pages = "7--16", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.136", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Coupling graphs are newly introduced in this paper to meet many application needs particularly in the field of bioinformatics. A coupling graph is a two-layer graph complex, in which each node from one layer of the graph complex has at least one connection with the nodes in the other layer, and vice versa. The coupling graph model is sufficiently powerful to capture strong and inherent associations between subgraph pairs in complicated applications. The focus of this paper is on mining algorithms of frequent coupling subgraphs and bioinformatics application. Although existing frequent subgraph mining algorithms are competent to identify frequent subgraphs from a graph database, they perform poorly on frequent coupling subgraph mining because they generate many irrelevant subgraphs. We propose a novel graph transformation technique to transform a coupling graph into a generic graph. Based on the transformed coupling graphs, existing graph mining methods are then utilized to discover frequent coupling subgraphs. We prove that the transformation is precise and complete and that the restoration is reversible. Experiments carried out on a database containing 10,511 coupling graphs show that our proposed algorithm reduces the mining time very much in comparison with the existing subgraph mining algorithms. Moreover, we demonstrate the usefulness of frequent coupling subgraphs by applying our algorithm to make accurate predictions of epitopes in antibody-antigen binding.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fan:2014:QSM, author = "Ying Fan and Ruoshui Lu and Lusheng Wang and Massimo Andreatta and Shuai Cheng Li", title = "Quantifying significance of {MHC II} residues", journal = j-TCBB, volume = "11", number = "1", pages = "17--25", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The major histocompatibility complex (MHC), a cell-surface protein mediating immune recognition, plays important roles in the immune response system of all higher vertebrates. MHC molecules are highly polymorphic and they are grouped into serotypes according to the specificity of the response. It is a common belief that a protein sequence determines its three dimensional structure and function. Hence, the protein sequence determines the serotype. Residues play different levels of importance. In this paper, we quantify the residue significance with the available serotype information. Knowing the significance of the residues will deepen our understanding of the MHC molecules and yield us a concise representation of the molecules. In this paper we propose a linear programming-based approach to find significant residue positions as well as quantifying their significance in MHC II DR molecules. A mong all the residues in MHC II DR molecules, 18 positions are of particular significance, which is consistent with the literature on MHC binding sites, and succinct pseudo-sequences appear to be adequate to capture the whole sequence features. When the result is used for classification of MHC molecules with serotype assigned by WHO, a 98.4 percent prediction performance is achieved. The methods have been implemented in java (http://code.google.com/p/quassi/).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yoo:2014:ICM, author = "Paul D. Yoo and Sami Muhaidat and Kamal Taha and Jamal Bentahar and Abdallah Shami", title = "Intelligent consensus modeling for proline cis-trans isomerization prediction", journal = j-TCBB, volume = "11", number = "1", pages = "26--32", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.132", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proline cis-trans isomerization (CTI) plays a key role in the rate-determining steps of protein folding. Accurate prediction of proline CTI is of great importance for the understanding of protein folding, splicing, cell signaling, and transmembrane active transport in both the human body and animals. Our goal is to develop a state-of-the-art proline CTI predictor based on a biophysically motivated intelligent consensus modeling through the use of sequence information only (i.e., position specific scores generated by PSI-BLAST). The current computational proline CTI predictors reach about 70-73 percent Q2 accuracies and about 0.40 Matthew correlation coefficient (Mcc) through the use of sequence-based evolutionary information as well as predicted protein secondary structure information. However, our approach that utilizes a novel decision tree-based consensus model with a powerful randomized-metalearning technique has achieved 86.58 percent Q2 accuracy and 0.74 Mcc, on the same proline CTI data set, which is a better result than those of any existing computational proline CTI predictors reported in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ghoraie:2014:RSS, author = "Laleh Soltan Ghoraie and Forbes Burkowski and Shuai Cheng Li and Mu Zhu", title = "Residue-specific side-chain polymorphisms via particle belief propagation", journal = j-TCBB, volume = "11", number = "1", pages = "33--41", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.130", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein side chains populate diverse conformational ensembles in crystals. Despite much evidence that there is widespread conformational polymorphism in protein side chains, most of the X -ray crystallography data are modeled by single conformations in the Protein Data Bank. The ability to extract or to predict these conformational polymorphisms is of crucial importance, as it facilitates deeper understanding of protein dynamics and functionality. In this paper, we describe a computational strategy capable of predicting side-chain polymorphisms. Our approach extends a particular class of algorithms for side-chain prediction by modeling the side-chain dihedral angles more appropriately as continuous rather than discrete variables. Employing a new inferential technique known as particle belief propagation, we predict residue-specific distributions that encode information about side-chain polymorphisms. Our predicted polymorphisms are in relatively close agreement with results from a state-of-the-art approach based on X -ray crystallography data, which characterizes the conformational polymorphisms of side chains using electron density information, and has successfully discovered previously unmodeled conformations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liao:2014:NUB, author = "Ruiqi Liao and Ruichang Zhang and Jihong Guan and Shuigeng Zhou", title = "A new unsupervised binning approach for metagenomic sequences based on {$N$}-grams and automatic feature weighting", journal = j-TCBB, volume = "11", number = "1", pages = "42--54", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The rapid development of high-throughput technologies enables researchers to sequence the whole metagenome of a microbial community sampled directly from the environment. The assignment of these sequence reads into different species or taxonomical classes is a crucial step for metagenomic analysis, which is referred to as binning of metagenomic data. Most traditional binning methods rely on known reference genomes for accurate assignment of the sequence reads, therefore cannot classify reads from unknown species without the help of close references. To overcome this drawback, unsupervised learning based approaches have been proposed, which need not any known species' reference genome for help. In this paper, we introduce a novel unsupervised method called MCluster for binning metagenomic sequences. This method uses N-grams to extract sequence features and utilizes automatic feature weighting to improve the performance of the basic K-means clustering algorithm. We evaluate MCluster on a variety of simulated data sets and a real data set, and compare it with three latest binning methods: AbundanceBin, MetaCluster 3.0, and MetaCluster 5.0. Experimental results show that MCluster achieves obviously better overall performance ( F -measure) than AbundanceBin and MetaCluster 3.0 on long metagenomic reads ({$>$}=800 bp); while compared with MetaCluster 5.0, MCluster obtains a larger sensitivity, and a comparable yet more stable F -measure on short metagenomic reads ({$<$300} bp). This suggests that MCluster can serve as a promising tool for effectively binning metagenomic sequences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hsiao:2014:GND, author = "Jui-Chen Hsiao and Chih-Hsuan Wei and Hung-Yu Kao", title = "Gene name disambiguation using multi-scope species detection", journal = j-TCBB, volume = "11", number = "1", pages = "55--62", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.139", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Species detection is an important topic in the text mining field. According to the importance of the research topics (e.g., species assignment to genes and document focus species detection), some studies are dedicated to an individual topic. However, no researcher to date has discussed species detection as a general problem. Therefore, we developed a multi-scope species detection model to identify the focus species for different scopes (i.e., gene mention, sentence, paragraph, and global scope of the entire article). Species assignment is one of the bottlenecks of gene name disambiguation. In our evaluation, recognizing the focus species of a gene mention in four different scopes improved the gene name disambiguation. We used the species cue words extracted from articles to estimate the relevance between an article and a species. The relevance score was calculated by our proposed entities frequency-augmented invert species frequency (EF-AISF) formula, which represents the importance of an entity to a species. We also defined a relation guide factor (RGF) to normalize the relevance score. Our method not only achieved better performance than previous methods but also can handle the articles that do not specifically mention a species. In the DECA corpus, we outperformed previous studies and obtained an accuracy of 88.22 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guo:2014:RFE, author = "Jing Guo and Ritika Jain and Peng Yang and Rui Fan and Chee Keong Kwoh and Jie Zheng", title = "Reliable and fast estimation of recombination rates by convergence diagnosis and parallel {Markov Chain Monte Carlo}", journal = j-TCBB, volume = "11", number = "1", pages = "63--72", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.133", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genetic recombination is an essential event during the process of meiosis resulting in an exchange of segments between paired chromosomes. Estimating recombination rate is crucial for understanding the process of recombination. Experimental methods are normally difficult and limited to small scale estimations. Thus statistical methods using population genetics data are important for large-scale analysis. LDhat is an extensively used statistical method using jMCMC algorithm to predict recombination rates. Due to the complexity of rjMCMC scheme, LDhat may take a long time for large SNP data sets. In addition, rjMCMC parameters should be manually defined in the original program which directly impact results. To address these issues, we designed an improved algorithm based on LDhat implementing MCMC convergence diagnostic algorithms to automatically predict values of parameters and monitor the mixing process. Then parallel computation methods were employed to further accelerate the new program. The new algorithms have been tested on ten samples from HapMap phase 2 data set. The results were compared with previous code and showed nearly identical output. However, our new methods achieved significant acceleration proving that they are more efficient and reliable for the estimation of recombination rates. The stand-alone package is freely available for download http://www.ntu.edu.sg/home/zhengjie/software/CPLDhat.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gan:2014:ICR, author = "Yanglan Gan and Jihong Guan and Shuigeng Zhou and Weixiong Zhang", title = "Identifying cis-regulatory elements and modules using conditional random fields", journal = j-TCBB, volume = "11", number = "1", pages = "73--82", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.131", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate identification of cis-regulatory elements and their correlated modules is essential for analysis of transcriptional regulation, which is a challenging problem in computational biology. Unsupervised learning has the advantage of compensating for missing annotated data, and is thus promising to be effective to identify cis-regulatory elements and modules. We introduced a Conditional Random Fields model, referred to as CRFEM, to integrate sequence features and long-range dependency of genomic sequences such as epigenetic features to identify cis-regulatory elements and modules at the same time. The proposed method is able to automatically learn model parameters with no labeled data and explicitly optimize the predictive probability of cis-regulatory elements and modules. In comparison with existing methods, our method is more accurate and can be used for genome-wide studies of gene regulation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Srihari:2014:ECC, author = "Sriganesh Srihari and Venkatesh Raman and Hon Wai Leong and Mark A. Ragan", title = "Evolution and controllability of cancer networks: a {Boolean} perspective", journal = j-TCBB, volume = "11", number = "1", pages = "83--94", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cancer forms a robust system capable of maintaining stable functioning (cell sustenance and proliferation) despite perturbations. Cancer progresses as stages over time typically with increasing aggressiveness and worsening prognosis. Characterizing these stages and identifying the genes driving transitions between them is critical to understand cancer progression and to develop effective anti-cancer therapies. In this work, we propose a novel model for the 'cancer system' as a Boolean state space in which a Boolean network, built from protein-interaction and gene-expression data from different stages of cancer, transits between Boolean satisfiability states by ``editing'' interactions and ``flipping'' genes. Edits reflect rewiring of the PPI network while flipping of genes reflect activation or silencing of genes between stages. We formulate a minimization problem MIN FLIP to identify these genes driving the transitions. The application of our model (called BoolSpace) on three case studies--pancreatic and breast tumours in human and post spinal-cord injury (SCI) in rats--reveals valuable insights into the phenomenon of cancer progression: (i) interactions involved in core cell-cycle and DNA-damage repair pathways are significantly rewired in tumours, indicating significant impact to key genome-stabilizing mechanisms; (ii) several of the genes flipped are serine/threonine kinases which act as biological switches, reflecting cellular switching mechanisms between stages; and (iii) different sets of genes are flipped during the initial and final stages indicating a pattern to tumour progression. Based on these results, we hypothesize that robustness of cancer partly stems from ``passing of the baton'' between genes at different stages--genes from different biological processes and/or cellular components are involved in different stages of tumour progression thereby allowing tumour cells to evade targeted therapy, and therefore an effective therapy should target a ``cover set'' of these genes. A C/C++ implementation of BoolSpace is freely available at: http://www.bioinformatics.org.au/tools-data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bandyopadhyay:2014:SCS, author = "Sanghamitra Bandyopadhyay and Saurav Mallik and Anirban Mukhopadhyay", title = "A survey and comparative study of statistical tests for identifying differential expression from microarray data", journal = j-TCBB, volume = "11", number = "1", pages = "95--115", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.147", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "DNA microarray is a powerful technology that can simultaneously determine the levels of thousands of transcripts (generated, for example, from genes/miRNAs) across different experimental conditions or tissue samples. The motto of differential expression analysis is to identify the transcripts whose expressions change significantly across different types of samples or experimental conditions. A number of statistical testing methods are available for this purpose. In this paper, we provide a comprehensive survey on different parametric and non-parametric testing methodologies for identifying differential expression from microarray data sets. The performances of the different testing methods have been compared based on some real-life miRNA and mRNA expression data sets. For validating the resulting differentially expressed miRNAs, the outcomes of each test are checked with the information available for miRNA in the standard miRNA database PhenomiR 2.0. Subsequently, we have prepared different simulated data sets of different sample sizes (from 10 to 100 per group/population) and thereafter the power of each test have been calculated individually. The comparative simulated study might lead to formulate robust and comprehensive judgements about the performance of each test in the basis of assumption of data distribution. Finally, a list of advantages and limitations of the different statistical tests has been provided, along with indications of some areas where further studies are required.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bandyopadhyay:2014:NPB, author = "Sanghamitra Bandyopadhyay and Koushik Mallick", title = "A new path based hybrid measure for gene ontology similarity", journal = j-TCBB, volume = "11", number = "1", pages = "116--127", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.149", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene Ontology (GO) consists of a controlled vocabulary of terms, annotating a gene or gene product, structured in a directed acyclic graph. In the graph, semantic relations connect the terms, that represent the knowledge of functional description and cellular component information of gene products. GO similarity gives us a numerical representation of biological relationship between a gene set, which can be used to infer various biological facts such as protein interaction, structural similarity, gene clustering, etc. Here we introduce a new shortest path based hybrid measure of ontological similarity between two terms which combines both structure of the GO graph and information content of the terms. Here the similarity between two terms $ t_1 $ and $ t_2 $, referred to as GOSimPBHM($ t_1 $, $ t_2$), has two components; one obtained from the common ancestors of $ t_1$ and $ t_2$. The other from their remaining ancestors. The proposed path based hybrid measure does not suffer from the well-known shallow annotation problem. Its superiority with respect to some other popular measures is established for protein protein interaction prediction, correlation with gene expression and functional classification of genes in a biological pathway. Finally, the proposed measure is utilized to compute the average GO similarity score among the genes that are experimentally validated targets of some microRNAs. Results demonstrate that the targets of a given miRNA have a high degree of similarity in the biological process category of GO.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Saeed:2014:CRC, author = "Fahad Saeed and Jason D. Hoffert and Mark A. Knepper", title = "{CAMS--RS}: clustering algorithm for large-scale mass spectrometry data using restricted search space and intelligent random sampling", journal = j-TCBB, volume = "11", number = "1", pages = "128--141", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.152", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput mass spectrometers can produce massive amounts of redundant data at an astonishing rate with many of them having poor signal-to-noise (S/N) ratio. These low S/N ratio spectra may not get interpreted using conventional spectra-to-database matching techniques. In this paper, we present an efficient algorithm, CAMS-RS (Clustering Algorithm for Mass Spectra using Restricted Space and Sampling) for clustering of raw mass spectrometry data. CAMS-RS utilizes a novel metric (called F-set) that exploits the temporal and spatial patterns to accurately assess similarity between two given spectra. The F-set similarity metric is independent of the retention time and allows clustering of mass spectrometry data from independent LC-MS/MS runs. A novel restricted search space strategy is devised to limit the comparisons of the number of spectra. An intelligent sampling method is executed on individual bins that allow merging of the results to make the final clusters. Our experiments, using experimentally generated data sets, show that the proposed algorithm is able to cluster spectra with high accuracy and is helpful in interpreting low S/N ratio spectra. The CAMS-RS algorithm is highly scalable with increasing number of spectra and our implementation allows clustering of up to a million spectra within minutes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Metsis:2014:DCN, author = "Vangelis Metsis and Fillia Makedon and Dinggang Shen and Heng Huang", title = "{DNA} copy number selection using robust structured sparsity-inducing norms", journal = j-TCBB, volume = "11", number = "1", pages = "138--181", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.141", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Array comparative genomic hybridization (aCGH) is a newly introduced method for the detection of copy number abnormalities associated with human diseases with special focus on cancer. Specific patterns in DNA copy number variations (CNVs) can be associated with certain disease types and can facilitate prognosis and progress monitoring of the disease. Machine learning techniques have been used to model the problem of tissue typing as a classification problem. Feature selection is an important part of the classification process, because many biological features are not related to the diseases and confuse the classification tasks. Multiple feature selection methods have been proposed in the different domains where classification has been applied. In this work, we will present a new feature selection method based on structured sparsity-inducing norms to identify the informative aCGH biomarkers which can help us classify different disease subtypes. To validate the performance of the proposed method, we experimentally compare it with existing feature selection methods on four publicly available aCGH data sets. In all empirical results, the proposed sparse learning based feature selection method consistently outperforms other related approaches. More important, we carefully investigate the aCGH biomarkers selected by our method, and the biological evidences in literature strongly support our results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2014:CGT, author = "Biing-Feng Wang and Chien-Hsin Lin and I-Tse Yang", title = "Constructing a gene team tree in almost {$ O (n \lg n) $} time", journal = j-TCBB, volume = "11", number = "1", pages = "142--153", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.150", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An important model of a conserved gene cluster is called the gene team model, in which a chromosome is defined to be a permutation of distinct genes and a gene team is defined to be a set of genes that appear in two or more species, with the distance between adjacent genes in the team for each chromosome always no more than a certain threshold $ \delta $. A gene team tree is a succinct way to represent all gene teams for every possible value of $ \delta $. The previous fastest algorithm for constructing a gene team tree of two chromosomes requires $ O (n \lg n \lg \lg n) $ time, which was given by Wang and Lin. Its bottleneck is a problem called the maximum-gap problem. In this paper, by presenting an improved algorithm for the maximum-gap problem, we reduce the upper bound of the gene team tree problem to $ O (n \lg n \alpha (n)) $. Since a grows extremely slowly, this result is almost as efficient as the current best upper bound, $ O (n \lg n) $, for finding the gene teams of a fixed $ \delta $ value. Our new algorithm is very efficient from both the theoretical and practical points of view. Wang and Lin's gene-team-tree algorithm can be extended to $k$ chromosomes with complexity $ O (k n \lg n \lg \lg n)$. Similarly, our improved algorithm for the maximum-gap problem reduces this running time to $ O (k n \lg n \alpha (n))$. In addition, it also provides new upper bounds for the gene team tree problem on general sequences, in which multiple copies of the same gene are allowed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kayano:2014:DDC, author = "Mitsunori Kayano and Motoki Shiga and Hiroshi Mamitsuka", title = "Detecting differentially coexpressed genes from labeled expression data: a brief review", journal = j-TCBB, volume = "11", number = "1", pages = "154--167", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2297921", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We review methods for capturing differential coexpression, which can be divided into two cases by the size of gene sets: (1) two paired genes and (2) multiple genes. In the first case, two genes are positively and negatively correlated with each other under one and the other conditions, respectively. In the second case, multiple genes are coexpressed and randomly expressed under one and the other conditions, respectively. We summarize a variety of methods for the first and second cases into four and three approaches, respectively. We describe each of these approaches in detail technically, being followed by thorough comparative experiments with both synthetic and real data sets. Our experimental results imply high possibility of improving the efficiency of the current methods, particularly in the case of multiple genes, because of low performance achieved by the best methods which are relatively simple intuitive ones.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Prabhakaran:2014:HHI, author = "Sandhya Prabhakaran and M{\'e}lanie Rey and Osvaldo Zagordi and Niko Beerenwinkel and Volker Roth", title = "{HIV} haplotype inference using a propagating {Dirichlet} process mixture model", journal = j-TCBB, volume = "11", number = "1", pages = "182--191", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.145", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents a new computational technique for the identification of HIV haplotypes. HIV tends to generate many potentially drug-resistant mutants within the HIV-infected patient and being able to identify these different mutants is important for efficient drug administration. With the view of identifying the mutants, we aim at analyzing short deep sequencing data called reads. From a statistical perspective, the analysis of such data can be regarded as a nonstandard clustering problem due to missing pairwise similarity measures between non-overlapping reads. To overcome this problem we propagate a Dirichlet Process Mixture Model by sequentially updating the prior information from successive local analyses. The model is verified using both simulated and real sequencing data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wei:2014:IPI, author = "Leyi Wei and Minghong Liao and Yue Gao and Rongrong Ji and Zengyou He and Quan Zou", title = "Improved and promising identification of human {MicroRNAs} by incorporating a high-quality negative set", journal = j-TCBB, volume = "11", number = "1", pages = "192--201", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.146", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNA (miRNA) plays an important role as a regulator in biological processes. Identification of (pre-) miRNAs helps in understanding regulatory processes. Machine learning methods have been designed for pre-miRNA identification. However, most of them cannot provide reliable predictive performances on independent testing data sets. We assumed this is because the training sets, especially the negative training sets, are not sufficiently representative. To generate a representative negative set, we proposed a novel negative sample selection technique, and successfully collected negative samples with improved quality. Two recent classifiers rebuilt with the proposed negative set achieved an improvement of $ \approx 6 $ percent in their predictive performance, which confirmed this assumption. Based on the proposed negative set, we constructed a training set, and developed an online system called miRNApre specifically for human pre-miRNA identification. We showed that miRNApre achieved accuracies on updated human and nonhuman data sets that were 34.3 and 7.6 percent higher than those achieved by current methods. The results suggest that miRNApre is an effective tool for pre-miRNA identification. Additionally, by integrating miRNApre, we developed a miRNA mining tool, mirnaDetect, which can be applied to find potential miRNAs in genome-scale data. MirnaDetect achieved a comparable mining performance on human chromosome 19 data as other existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Esfahani:2014:IBP, author = "Mohammad Shahrokh Esfahani and Edward R. Dougherty", title = "Incorporation of biological pathway knowledge in the construction of priors for optimal {Bayesian} classification", journal = j-TCBB, volume = "11", number = "1", pages = "202--218", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.143", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Small samples are commonplace in genomic/proteomic classification, the result being inadequate classifier design and poor error estimation. The problem has recently been addressed by utilizing prior knowledge in the form of a prior distribution on an uncertainty class of feature-label distributions. A critical issue remains: how to incorporate biological knowledge into the prior distribution. For genomics/proteomics, the most common kind of knowledge is in the form of signaling pathways. Thus, it behooves us to find methods of transforming pathway knowledge into knowledge of the feature-label distribution governing the classification problem. In this paper, we address the problem of prior probability construction by proposing a series of optimization paradigms that utilize the incomplete prior information contained in pathways (both topological and regulatory). The optimization paradigms employ the marginal log-likelihood, established using a small number of feature-label realizations (sample points) regularized with the prior pathway information about the variables. In the special case of a Normal-Wishart prior distribution on the mean and inverse covariance matrix (precision matrix) of a Gaussian distribution, these optimization problems become convex. Companion website: gsp.tamu.edu/ Publications/supplementary/shahrokh13a.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Amit:2014:LEP, author = "Mika Amit and Rolf Backofen and Steffen Heyne and Gad M. Landau and Mathias M{\"o}hl and Christina Otto and Sebastian Will", title = "Local exact pattern matching for non-fixed {RNA} structures", journal = j-TCBB, volume = "11", number = "1", pages = "219--230", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2297113", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting local common sequence-structure regions of RNAs is a biologically important problem. Detecting such regions allows biologists to identify functionally relevant similarities between the inspected molecules. We developed dynamic programming algorithms for finding common structure-sequence patterns between two RNAs. The RNAs are given by their sequence and a set of potential base pairs with associated probabilities. In contrast to prior work on local pattern matching of RNAs, we support the breaking of arcs. This allows us to add flexibility over matching only fixed structures; potentially matching only a similar subset of specified base pairs. We present an $ O(n^3) $ algorithm for local exact pattern matching between two nested RNAs, and an $ O(n^3 \log n) $ algorithm for one nested RNA and one bounded-unlimited RNA. In addition, an algorithm for approximate pattern matching is introduced that for two given nested RNAs and a number $k$, finds the maximal local pattern matching score between the two RNAs with at most $k$ mismatches in $ O(n^3 k^2)$ time. Finally, we present an $ O(n^3)$ algorithm for finding the most similar subforest between two nested RNAs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gorecki:2014:MDC, author = "Pawel G{\'o}recki and Oliver Eulenstein", title = "Maximizing deep coalescence cost", journal = j-TCBB, volume = "11", number = "1", pages = "231--242", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.144", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The minimizing deep coalescence (MDC) problem seeks a species tree that reconciles the given gene trees with the minimum number of deep coalescence events, called deep coalescence (DC) cost. To better assess MDC species trees we investigate into a basic mathematical property of the DC cost, called the diameter. Given a gene tree, a species tree, and a leaf labeling function that assigns leaf-genes of the gene tree to a leaf-species in the species tree from which they were sampled, the DC cost describes the discordance between the trees caused by deep coalescence events. The diameter of a gene tree and a species tree is the maximum DC cost across all leaf labelings for these trees. We prove fundamental mathematical properties describing precisely these diameters for bijective and general leaf labelings, and present efficient algorithms to compute the diameters and their corresponding leaf labelings. In particular, we describe an optimal, i.e., linear time, algorithm for the bijective case. Finally, in an experimental study we demonstrate that the average diameters between a gene tree and a species tree grow significantly slower than their naive upper bounds, suggesting that our exact bounds can significantly improve on assessing DC costs when using diameters.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2014:MSA, author = "Jun Sun and Vasile Palade and Xiaojun Wu and Wei Fang", title = "Multiple sequence alignment with hidden {Markov} models learned by random drift particle swarm optimization", journal = j-TCBB, volume = "11", number = "1", pages = "243--257", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.148", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Hidden Markov Models (HMMs) are powerful tools for multiple sequence alignment (MSA), which is known to be an NP-complete and important problem in bioinformatics. Learning HMMs is a difficult task, and many meta-heuristic methods, including particle swarm optimization (PSO), have been used for that. In this paper, a new variant of PSO, called the random drift particle swarm optimization (RDPSO) algorithm, is proposed to be used for HMM learning tasks in MSA problems. The proposed RDPSO algorithm, inspired by the free electron model in metal conductors in an external electric field, employs a novel set of evolution equations that can enhance the global search ability of the algorithm. Moreover, in order to further enhance the algorithmic performance of the RDPSO, we incorporate a diversity control method into the algorithm and, thus, propose an RDPSO with diversity-guided search (RDPSODGS). The performances of the RDPSO, RDPSO-DGS and other algorithms are tested and compared by learning HMMs for MSA on two well-known benchmark data sets. The experimental results show that the HMMs learned by the RDPSO and RDPSO-DGS are able to generate better alignments for the benchmark data sets than other most commonly used HMM learning methods, such as the Baum-Welch and other PSO algorithms. The performance comparison with well-known MSA programs, such as ClustalW and MAFFT, also shows that the proposed methods have advantages in multiple sequence alignment.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Healy:2014:AKM, author = "John Healy and Desmond Chambers", title = "Approximate $k$-mer matching using fuzzy hash maps", journal = j-TCBB, volume = "11", number = "1", pages = "258--264", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2309609", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a fuzzy technique for approximate $k$-mer matching that combines the speed of hashing with the sensitivity of dynamic programming. Our approach exploits the collision detection mechanism used by hash maps, unifying the two phases of ``seed and extend'' into a single operation that executes in close to $ O(1)$ average time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2014:EPF, author = "Guoxian Yu and Huzefa Rangwala and Carlotta Domeniconi and Guoji Zhang and Zhiwen Yu", title = "Erratum to {``Protein function prediction using multilabel ensemble classification''}", journal = j-TCBB, volume = "11", number = "1", pages = "265--265", month = jan, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2299736", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:12 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2014:MLS, author = "Chang-Kug Kim and Jin-A Kim and Ji-Weon Choi and In-Seon Jeong and Yi-Seul Moon and Dong-Suk Park and Young-Joo Seol and Yong-Kab Kim and Yong-Hwan Kim and Yeon-Ki Kim", title = "A multi-layered screening method to identify plant regulatory genes", journal = j-TCBB, volume = "11", number = "2", pages = "293--303", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2296308", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We used a seven-step process to identify genes involved in glucosinolate biosynthesis and metabolism in the Chinese cabbage (Brassica rapa). We constructed an annotated data set with 34,570 unigenes from B. rapa and predicted 11,526 glucosinolate-related candidate genes using expression profiles generated across nine stages of development on a 47k-gene microarray. Using our multi-layered screening method, we screened 392 transcription factors, 843 pathway genes, and 4,162 ortholog genes associated with glucosinolate-related biosynthesis. Finally, we identified five genes by comparison of the pathway-network genes including the transcription-factor genes and the ortholog-ontology genes. The five genes were anchored to the chromosomes of B. rapa to characterize their genetic-map positions, and phylogenetic reconstruction with homologous genes was performed. These anchored genes were verified by reverse-transcription polymerase chain reaction. While the five genes identified by our multi-layered screen require further characterization and validation, our study demonstrates the power of multi-layered screening after initial identification of genes on microarrays.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nye:2014:ACP, author = "Tom M. W. Nye", title = "An algorithm for constructing principal geodesics in phylogenetic treespace", journal = j-TCBB, volume = "11", number = "2", pages = "304--315", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2309599", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Most phylogenetic analyses result in a sample of trees, but summarizing and visualizing these samples can be challenging. Consensus trees often provide limited information about a sample, and so methods such as consensus networks, clustering and multidimensional scaling have been developed and applied to tree samples. This paper describes a stochastic algorithm for constructing a principal geodesic or line through treespace which is analogous to the first principal component in standard principal components analysis. A principal geodesic summarizes the most variable features of a sample of trees, in terms of both tree topology and branch lengths, and it can be visualized as an animation of smoothly changing trees. The algorithm performs a stochastic search through parameter space for a geodesic which minimizes the sum of squared projected distances of the data points. This procedure aims to identify the globally optimal principal geodesic, though convergence to locally optimal geodesics is possible. The methodology is illustrated by constructing principal geodesics for experimental and simulated data sets, demonstrating the insight into samples of trees that can be gained and how the method improves on a previously published approach. A java package called GeoPhytter for constructing and visualizing principal geodesics is freely available from www.ncl.ac.uk/ntmwn/geophytter.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Eshra:2014:OPC, author = "Abeer Eshra and Ayman El-Sayed", title = "An odd parity checker prototype using {DNAzyme} finite state machine", journal = j-TCBB, volume = "11", number = "2", pages = "316--324", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2295803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A finite-state machine (FSM) is an abstract mathematical model of computation used to design both computer programs and sequential logic circuits. Considered as an abstract model of computation, FSM is weak; it has less computational power than some other models of computation such as the Turing machine. This paper discusses the finite-state automata based on Deoxyribonucleic Acid (DNA) and different implementations of DNA FSMs. Moreover, a comparison was made to clarify the advantages and disadvantages of each kind of presented DNA FSMS. Since it is a major goal for nanoscince, nanotechnology and super molecular chemistry is to design synthetic molecular devices that are programmable and run autonomously. Programmable means that the behavior of the device can be modified without redesigning the whole structure. Autonomous means that it runs without externally mediated change to the work cycle. In this paper we present an odd Parity Checker Prototype Using DNAzyme FSM. Our paper makes use of a known design for a DNA nanorobotic device due to Reif and Sahu [1] for executing FSM computations using DNAzymes. The main contribution of our paper is a description of how to program that device to do a FSM computation known as odd parity checking. We describe in detail finite state automaton built on 10-23 DNAzyme, and give its procedure of design and computation. The design procedure has two major phases: designing the language potential alphabet DNA strands, and depending on the first phase to design the DNAzyme possible transitions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Davidson:2014:DBP, author = "Ruth Davidson and Seth Sullivant", title = "Distance-based phylogenetic methods around a polytomy", journal = j-TCBB, volume = "11", number = "2", pages = "325--335", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2309592", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Distance-based phylogenetic algorithms attempt to solve the NP-hard least-squares phylogeny problem by mapping an arbitrary dissimilarity map representing biological data to a tree metric. The set of all dissimilarity maps is a Euclidean space properly containing the space of all tree metrics as a polyhedral fan. Outputs of distance-based tree reconstruction algorithms such as UPGMA and neighbor-joining are points in the maximal cones in the fan. Tree metrics with polytomies lie at the intersections of maximal cones. A phylogenetic algorithm divides the space of all dissimilarity maps into regions based upon which combinatorial tree is reconstructed by the algorithm. Comparison of phylogenetic methods can be done by comparing the geometry of these regions. We use polyhedral geometry to compare the local nature of the subdivisions induced by least-squares phylogeny, UPGMA, and neighbor-joining when the true tree has a single polytomy with exactly four neighbors. Our results suggest that in some circumstances, UPGMA and neighbor-joining poorly match least-squares phylogeny.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Patton:2014:HPI, author = "Kristopher L. Patton and David J. John and James L. Norris and Daniel R. Lewis and Gloria K. Muday", title = "Hierarchical probabilistic interaction modeling for multiple gene expression replicates", journal = j-TCBB, volume = "11", number = "2", pages = "336--346", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2299804", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microarray technology allows for the collection of multiple replicates of gene expression time course data for hundreds of genes at a handful of time points. Developing hypotheses about a gene transcriptional network, based on time course gene expression data is an important and very challenging problem. In many situations there are similarities which suggest a hierarchical structure between the replicates. This paper develops posterior probabilities for network features based on multiple hierarchical replications. Through Bayesian inference, in conjunction with the Metropolis--Hastings algorithm and model averaging, a hierarchical multiple replicate algorithm is applied to seven sets of simulated data and to a set of Arabidopsis thaliana gene expression data. The models of the simulated data suggest high posterior probabilities for pairs of genes which have at least moderate signal partial correlation. For the Arabidopsis model, many of the highest posterior probability edges agree with the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{GaneshKumar:2014:HAB, author = "Pugalendhi GaneshKumar and Chellasamy Rani and Durairaj Devaraj and T. Aruldoss Albert Victoire", title = "Hybrid ant bee algorithm for fuzzy expert system based sample classification", journal = j-TCBB, volume = "11", number = "2", pages = "347--360", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2307325", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accuracy maximization and complexity minimization are the two main goals of a fuzzy expert system based microarray data classification. Our previous Genetic Swarm Algorithm (GSA) approach has improved the classification accuracy of the fuzzy expert system at the cost of their interpretability. The if-then rules produced by the GSA are lengthy and complex which is difficult for the physician to understand. To address this interpretability-accuracy tradeoff, the rule set is represented using integer numbers and the task of rule generation is treated as a combinatorial optimization task. Ant colony optimization (ACO) with local and global pheromone updations are applied to find out the fuzzy partition based on the gene expression values for generating simpler rule set. In order to address the formless and continuous expression values of a gene, this paper employs artificial bee colony (ABC) algorithm to evolve the points of membership function. Mutual Information is used for identification of informative genes. The performance of the proposed hybrid Ant Bee Algorithm (ABA) is evaluated using six gene expression data sets. From the simulation study, it is found that the proposed approach generated an accurate fuzzy system with highly interpretable and compact rules for all the data sets when compared with other approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tanaka:2014:IEE, author = "Shunji Tanaka", title = "Improved exact enumerative algorithms for the planted $ (l, d)$-motif search problem", journal = j-TCBB, volume = "11", number = "2", pages = "361--374", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306842", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper efficient exact algorithms are proposed for the planted $ (l, d)$-motif search problem. This problem is to find all motifs of length $l$ that are planted in each input string with at most $d$ mismatches. The ``quorum'' version of this problem is also treated in this paper to find motifs planted not in all input strings but in at least $q$ input strings. The proposed algorithms are based on the previous algorithms called qPMSPruneI and qPMS7 that traverse a search tree starting from a $l$-length substring of an input string. To improve these previous algorithms, several techniques are introduced, which contribute to reducing the computation time for the traversal. In computational experiments, it will be shown that the proposed algorithms outperform the previous algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Siren:2014:IGP, author = "Jouni Sir{\'e}n and Niko V{\"a}lim{\"a}ki and Veli M{\"a}kinen", title = "Indexing graphs for path queries with applications in genome research", journal = j-TCBB, volume = "11", number = "2", pages = "375--388", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2297101", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a generic approach to replace the canonical sequence representation of genomes with graph representations, and study several applications of such extensions. We extend the Burrows--Wheeler transform (BWT) of strings to acyclic directed labeled graphs, to support path queries as an extension to substring searching. We develop, apply, and tailor this technique to (a) read alignment on an extended BWT index of a graph representing pan-genome, i.e., reference genome and known variants of it; and (b) split-read alignment on an extended BWT index of a splicing graph. Other possible applications include probe/primer design, alignments to assembly graphs, and alignments to phylogenetic tree of partial-order graphs. We report several experiments on the feasibility and applicability of the approach. Especially on highly-polymorphic genome regions our pan-genome index is making a significant improvement in alignment accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Labarre:2014:MPL, author = "Anthony Labarre and Sicco Verwer", title = "Merging partially labelled trees: hardness and a declarative programming solution", journal = j-TCBB, volume = "11", number = "2", pages = "389--397", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2307200", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Intraspecific studies often make use of haplotype networks instead of gene genealogies to represent the evolution of a set of genes. Cassens et al. [3] proposed one such network reconstruction method, based on the global maximum parsimony principle, which was later recast by the first author of the present work as the problem of finding a minimum common supergraph of a set of $t$ partially labelled trees. Although algorithms have been proposed for solving that problem on two graphs, the complexity of the general problem on trees remains unknown. In this paper, we show that the corresponding decision problem is NP-complete for $ t = 3$. We then propose a declarative programming approach to solving the problem to optimality in practice, as well as a heuristic approach, both based on the IDP system, and assess the performance of both methods on randomly generated data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2014:OSM, author = "Weiming Li and Bin Ma and Kaizhong Zhang", title = "Optimizing spaced $k$-mer neighbors for efficient filtration in protein similarity search", journal = j-TCBB, volume = "11", number = "2", pages = "398--406", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306831", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Large-scale comparison or similarity search of genomic DNA and protein sequence is of fundamental importance in modern molecular biology. To perform DNA and protein sequence similarity search efficiently, seeding (or filtration) method has been widely used where only sequences sharing a common pattern or ``seed'' are subject to detailed comparison. Therefore these methods trade search sensitivity with search speed. In this paper, we introduce a new seeding method, called spaced $k$-mer neighbors, which provides a better tradeoff between the sensitivity and speed in protein sequence similarity search. With the method of spaced $k$-mer neighbors, for each spaced $k$-mer, a set of spaced $k$-mers is selected as its neighbors. These pre-selected spaced $k$-mer neighbors are then used to detect hits between query sequence and database sequences. We propose an efficient heuristic algorithm for the spaced neighbor selection. Our computational experimental results demonstrate that the method of spaced $k$-mer neighbors can improve the overall tradeoff efficiency over existing seeding methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tang:2014:PEP, author = "Xiwei Tang and Jianxin Wang and Jiancheng Zhong and Yi Pan", title = "Predicting essential proteins based on weighted degree centrality", journal = j-TCBB, volume = "11", number = "2", pages = "407--418", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2295318", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Essential proteins are vital for an organism's viability under a variety of conditions. There are many experimental and computational methods developed to identify essential proteins. Computational prediction of essential proteins based on the global protein-protein interaction (PPI) network is severely restricted because of the insufficiency of the PPI data, but fortunately the gene expression profiles help to make up the deficiency. In this work, Pearson correlation coefficient (PCC) is used to bridge the gap between PPI and gene expression data. Based on PCC and edge clustering coefficient (ECC), a new centrality measure, i.e., the weighted degree centrality (WDC), is developed to achieve the reliable prediction of essential proteins. WDC is employed to identify essential proteins in the yeast PPI and e-Coli networks in order to estimate its performance. For comparison, other prediction technologies are also performed to identify essential proteins. Some evaluation methods are used to analyze the results from various prediction approaches. The prediction results and comparative analyses are shown in the paper. Furthermore, the parameter $ \lambda $ in the method WDC will be analyzed in detail and an optimal $ \lambda $ value will be found. Based on the optimal $ \lambda $ value, the differentiation of WDC and another prediction method PeC is discussed. The analyses prove that WDC outperforms other methods including DC, BC, CC, SC, EC, IC, NC, and PeC. At the same time, the analyses also mean that it is an effective way to predict essential proteins by means of integrating different data sources.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nasr:2014:SSS, author = "Kamal {Al Nasr} and Desh Ranjan and Mohammad Zubair and Lin Chen and Jing He", title = "Solving the secondary structure matching problem in cryo-{EM} de novo modeling using a constrained {$K$}-shortest path graph algorithm", journal = j-TCBB, volume = "11", number = "2", pages = "419--430", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2302803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Electron cryomicroscopy is becoming a major experimental technique in solving the structures of large molecular assemblies. More and more three-dimensional images have been obtained at the medium resolutions between 5 and 10{\AA}. At this resolution range, major $ \alpha $-helices can be detected as cylindrical sticks and \beta -sheets can be detected as plain-like regions. A critical question in de novo modeling from cryo-EM images is to determine the match between the detected secondary structures from the image and those on the protein sequence. We formulate this matching problem into a constrained graph problem and present an $ O(\Delta^2 N^2 2^N)$ algorithm to this NP-Hard problem. The algorithm incorporates the dynamic programming approach into a constrained $k$-shortest path algorithm. Our method, DP-TOSS, has been tested using $ \alpha $-proteins with maximum 33 helices and $ \alpha $--$ \beta $ proteins up to five helices and 12 \beta -strands. The correct match was ranked within the top 35 for 19 of the 20 $ \alpha $-proteins and all nine $ \alpha $--$ \beta $ proteins tested. The results demonstrate that DP-TOSS improves accuracy, time and memory space in deriving the topologies of the secondary structure elements for proteins with a large number of secondary structures and a complex skeleton.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Moskon:2014:SAC, author = "Miha Moskon and Miha Mraz", title = "Systematic approach to computational design of gene regulatory networks with information processing capabilities", journal = j-TCBB, volume = "11", number = "2", pages = "431--440", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2295792", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present several measures that can be used in de novo computational design of biological systems with information processing capabilities. Their main purpose is to objectively evaluate the behavior and identify the biological information processing structures with the best dynamical properties. They can be used to define constraints that allow one to simplify the design of more complex biological systems. These measures can be applied to existent computational design approaches in synthetic biology, i.e., rational and automatic design approaches. We demonstrate their use on (a) the computational models of several basic information processing structures implemented with gene regulatory networks and (b) on a modular design of a synchronous toggle switch.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tulpan:2014:TPP, author = "Dan Tulpan and Derek H. Smith and Roberto Montemanni", title = "Thermodynamic post-processing versus {GC}-content pre-processing for {DNA} codes satisfying the {Hamming} distance and reverse-complement constraints", journal = j-TCBB, volume = "11", number = "2", pages = "441--452", month = mar, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2299815", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:18 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Stochastic, meta-heuristic and linear construction algorithms for the design of DNA strands satisfying Hamming distance and reverse-complement constraints often use a GC-content constraint to pre-process the DNA strands. Since GC-content is a poor predictor of DNA strand hybridization strength the strands can be filtered by post-processing using thermodynamic calculations. An alternative approach is considered here, where the algorithms are modified to remove consideration of GC-content and rely on post-processing alone to obtain large sets of DNA strands with satisfactory melting temperatures. The two approaches (pre-processing GC-content and post-processing melting temperatures) are compared and are shown to be complementary when large DNA sets are desired. In particular, the second approach can give significant improvements when linear constructions are used.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cai:2014:GEI, author = "Zhipeng Cai and Oliver Eulenstein and Cynthia Gibas", title = "Guest editors introduction to the special section on bioinformatics research and applications", journal = j-TCBB, volume = "11", number = "3", pages = "453--454", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huffner:2014:PBN, author = "Falk H{\"u}ffner and Christian Komusiewicz and Adrian Liebtrau and Rolf Niedermeier", title = "Partitioning biological networks into highly connected clusters with maximum edge coverage", journal = j-TCBB, volume = "11", number = "3", pages = "455--467", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.177", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A popular clustering algorithm for biological networks which was proposed by Hartuv and Shamir [5] identifies nonoverlapping highly connected components. We extend the approach taken by this algorithm by introducing the combinatorial optimization problem HIGHLY CONNECTED DELETION, which asks for removing as few edges as possible from a graph such that the resulting graph consists of highly connected components. We show that HIGHLY CONNECTED DELETION is NP-hard and provide a fixed-parameter algorithm and a kernelization. We propose exact and heuristic solution strategies, based on polynomial-time data reduction rules and integer linear programming with column generation. The data reduction typically identifies 75 percent of the edges that are deleted for an optimal solution; the column generation method can then optimally solve protein interaction networks with up to 6,000 vertices and 13,500 edges within five hours. Additionally, we present a new heuristic that finds more clusters than the method by Hartuv and Shamir.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2014:MSS, author = "Xuebo Song and Lin Li and Pradip K. Srimani and Philip S. Yu and James Z. Wang", title = "Measure the semantic similarity of {GO} terms using aggregate information content", journal = j-TCBB, volume = "11", number = "3", pages = "468--476", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.176", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The rapid development of gene ontology (GO) and huge amount of biomedical data annotated by GO terms necessitate computation of semantic similarity of GO terms and, in turn, measurement of functional similarity of genes based on their annotations. In this paper we propose a novel and efficient method to measure the semantic similarity of GO terms. The proposed method addresses the limitations in existing GO term similarity measurement techniques; it computes the semantic content of a GO term by considering the information content of all of its ancestor terms in the graph. The aggregate information content (AIC) of all ancestor terms of a GO term implicitly reflects the GO term's location in the GO graph and also represents how human beings use this GO term and all its ancestor terms to annotate genes. We show that semantic similarity of GO terms obtained by our method closely matches the human perception. Extensive experimental studies show that this novel method also outperforms all existing methods in terms of the correlation with gene expression data. We have developed web services for measuring semantic similarity of GO terms and functional similarity of genes using the proposed AIC method and other popular methods. These web services are available at http:// bioinformatics.clemson.edu/G-SESAME.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2014:EIL, author = "Yu Zheng and Louxin Zhang", title = "Effect of incomplete lineage sorting on tree-reconciliation-based inference of gene duplication", journal = j-TCBB, volume = "11", number = "3", pages = "477--485", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2297913", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the tree reconciliation approach to infer the duplication history of a gene family, the gene (family) tree is compared to the corresponding species tree. Incomplete lineage sorting (ILS) gives rise to stochastic variation in the topology of a gene tree and hence likely introduces false duplication events when a tree reconciliation method is used. We quantify the effect of ILS on gene duplication inference in a species tree in terms of the expected number of false duplication events inferred from reconciling a random gene tree, which occurs with a probability predicted in coalescent theory, and the species tree. We computationally examine the relationship between the effect of ILS on duplication inference in a species tree and its topological parameters. Our findings suggest that ILS may cause non-negligible bias on duplication inference, particularly on an asymmetric species tree. Hence, when gene duplication is inferred via tree reconciliation or any other approach that takes gene tree topology into account, the ILS-induced bias should be examined cautiously.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2014:DPC, author = "Bihai Zhao and Jianxin Wang and Min Li and Fang-Xiang Wu and Yi Pan", title = "Detecting protein complexes based on uncertain graph model", journal = j-TCBB, volume = "11", number = "3", pages = "486--497", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2297915", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Advanced biological technologies are producing large-scale protein-protein interaction (PPI) data at an ever increasing pace, which enable us to identify protein complexes from PPI networks. Pair-wise protein interactions can be modeled as a graph, where vertices represent proteins and edges represent PPIs. However most of current algorithms detect protein complexes based on deterministic graphs, whose edges are either present or absent. Neighboring information is neglected in these methods. Based on the uncertain graph model, we propose the concept of expected density to assess the density degree of a subgraph, the concept of relative degree to describe the relationship between a protein and a subgraph in a PPI network. We develop an algorithm called DCU (detecting complex based on uncertain graph model) to detect complexes from PPI networks. In our method, the expected density combined with the relative degree is used to determine whether a subgraph represents a complex with high cohesion and low coupling. We apply our method and the existing competing algorithms to two yeast PPI networks. Experimental results indicate that our method performs significantly better than the state-of-the-art methods and the proposed model can provide more insights for future study in PPI networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Marchiori:2014:GEP, author = "Elena Marchiori and Alioune Ngom and Raj Acharya", title = "Guest editorial: pattern recognition in bioinformatics", journal = j-TCBB, volume = "11", number = "3", pages = "498--499", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2315668", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Comin:2014:FEP, author = "Matteo Comin and Morris Antonello", title = "Fast entropic profiler: an information theoretic approach for the discovery of patterns in genomes", journal = j-TCBB, volume = "11", number = "3", pages = "500--509", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2297924", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Information theory has been used for quite some time in the area of computational biology. In this paper we present a pattern discovery method, named Fast Entropic Profiler, that is based on a local entropy function that captures the importance of a region with respect to the whole genome. The local entropy function has been introduced by Vinga and Almeida in [29], here we discuss and improve the original formulation. We provide a linear time and linear space algorithm called Fast Entropic Profiler (FastEP), as opposed to the original quadratic implementation. Moreover we propose an alternative normalization that can be also efficiently implemented. We show that FastEP is suitable for large genomes and for the discovery of patterns with unbounded length. FastEP is available at http://www.dei.unipd.it/~ciompin/main/FastEP.html.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dehzangi:2014:SBM, author = "Abdollah Dehzangi and Kuldip Paliwal and James Lyons and Alok Sharma and Abdul Sattar", title = "A segmentation-based method to extract structural and evolutionary features for protein fold recognition", journal = j-TCBB, volume = "11", number = "3", pages = "510--519", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.2296317", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein fold recognition (PFR) is considered as an important step towards the protein structure prediction problem. Despite all the efforts that have been made so far, finding an accurate and fast computational approach to solve the PFR still remains a challenging problem for bioinformatics and computational biology. In this study, we propose the concept of segmented-based feature extraction technique to provide local evolutionary information embedded in position specific scoring matrix (PSSM) and structural information embedded in the predicted secondary structure of proteins using SPINE-X. We also employ the concept of occurrence feature to extract global discriminatory information from PSSM and SPINE-X. By applying a support vector machine (SVM) to our extracted features, we enhance the protein fold prediction accuracy for 7.4 percent over the best results reported in the literature. We also report 73.8 percent prediction accuracy for a data set consisting of proteins with less than 25 percent sequence similarity rates and 80.7 percent prediction accuracy for a data set with proteins belonging to 110 folds with less than 40 percent sequence similarity rates. We also investigate the relation between the number of folds and the number of features being used and show that the number of features should be increased to get better protein fold prediction results when the number of folds is relatively large.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ochs:2014:OAT, author = "Michael F. Ochs and Jason E. Farrar and Michael Considine and Yingying Wei and Soheil Meshinchi and Robert J. Arceci", title = "Outlier analysis and top scoring pair for integrated data analysis and biomarker discovery", journal = j-TCBB, volume = "11", number = "3", pages = "520--532", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.153", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pathway deregulation has been identified as a key driver of carcinogenesis, with proteins in signaling pathways serving as primary targets for drug development. Deregulation can be driven by a number of molecular events, including gene mutation, epigenetic changes in gene promoters, overexpression, and gene amplifications or deletions. We demonstrate a novel approach that identifies pathways of interest by integrating outlier analysis within and across molecular data types with gene set analysis. We use the results to seed the top-scoring pair algorithm to identify robust biomarkers associated with pathway deregulation. We demonstrate this methodology on pediatric acute myeloid leukemia (AML) data. We develop a biomarker in primary AML tumors, demonstrate robustness with an independent primary tumor data set, and show that the identified biomarkers also function well in relapsed pediatric AML tumors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jauhari:2014:MGE, author = "Shaurya Jauhari and S. A. M. Rizvi", title = "Mining gene expression data focusing cancer therapeutics: a digest", journal = j-TCBB, volume = "11", number = "3", pages = "533--547", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2312002", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An understanding towards genetics and epigenetics is essential to cope up with the paradigm shift which is underway. Personalized medicine and gene therapy will confluence the days to come. This review highlights traditional approaches as well as current advancements in the analysis of the gene expression data from cancer perspective. Due to improvements in biometric instrumentation and automation, it has become easier to collect a lot of experimental data in molecular biology. Analysis of such data is extremely important as it leads to knowledge discovery that can be validated by experiments. Previously, the diagnosis of complex genetic diseases has conventionally been done based on the non-molecular characteristics like kind of tumor tissue, pathological characteristics, and clinical phase. The microarray data can be well accounted for high dimensional space and noise. Same were the reasons for ineffective and imprecise results. Several machine learning and data mining techniques are presently applied for identifying cancer using gene expression data. While differences in efficiency do exist, none of the well-established approaches is uniformly superior to others. The quality of algorithm is important, but is not in itself a guarantee of the quality of a specific data analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2014:ACP, author = "Andrew K. C. Wong and En-Shiun Annie Lee", title = "Aligning and clustering patterns to reveal the protein functionality of sequences", journal = j-TCBB, volume = "11", number = "3", pages = "548--560", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306840", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Discovering sequence patterns with variations unveils significant functions of a protein family. Existing combinatorial methods of discovering patterns with variations are computationally expensive, and probabilistic methods require more elaborate probabilistic representation of the amino acid associations. To overcome these shortcomings, this paper presents a new computationally efficient method for representing patterns with variations in a compact representation called Aligned Pattern Cluster (AP Cluster). To tackle the runtime, our method discovers a shortened list of non-redundant statistically significant sequence associations based on our previous work. To address the representation of protein functional regions, our pattern alignment and clustering step, presented in this paper captures the conservations and variations of the aligned patterns. We further refine our solution to allow more coverage of sequences via extending the AP Clusters containing only statistically significant patterns to Weak and Conserved AP Clusters. When applied to the cytochrome c, the ubiquitin, and the triosephosphate isomerase protein families, our algorithm identifies the binding segments as well as the binding residues. When compared to other methods, ours discovers all binding sites in the AP Clusters with superior entropy and coverage. The identification of patterns with variations help biologists to avoid time-consuming simulations and experimentations. (Software available upon request).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mina:2014:IRL, author = "Marco Mina and Pietro Hiram Guzzi", title = "Improving the robustness of local network alignment: design and extensive assessment of a {Markov} clustering-based approach", journal = j-TCBB, volume = "11", number = "3", pages = "561--572", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2318707", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The analysis of protein behavior at the network level had been applied to elucidate the mechanisms of protein interaction that are similar in different species. Published network alignment algorithms proved to be able to recapitulate known conserved modules and protein complexes, and infer new conserved interactions confirmed by wet lab experiments. In the meantime, however, a plethora of continuously evolving protein-protein interaction (PPI) data sets have been developed, each featuring different levels of completeness and reliability. For instance, algorithms performance may vary significantly when changing the data set used in their assessment. Moreover, existing papers did not deeply investigate the robustness of alignment algorithms. For instance, some algorithms performances vary significantly when changing the data set used in their assessment. In this work, we design an extensive assessment of current algorithms discussing the robustness of the results on the basis of input networks. We also present AlignMCL, a local network alignment algorithm based on an improved model of alignment graph and Markov Clustering. AlignMCL performs better than other state-of-the-art local alignment algorithms over different updated data sets. In addition, AlignMCL features high levels of robustness, producing similar results regardless the selected data set.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shen:2014:LKS, author = "Qingliang Shen and Hong Tian and Daoqi Tang and Wenbing Yao and Xiangdong Gao", title = "Ligand-{K*} sequence elimination: a novel algorithm for ensemble-based redesign of receptor-ligand binding", journal = j-TCBB, volume = "11", number = "3", pages = "573--578", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2302795", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "K* is rotamerically ensemble-based approach to compute the binding constant. However, its time-consuming feature limited its application. We present a novel algorithm that not only computes the partition function efficiently, but also avoids the exponential growth of execution time by iteratively pruning the sequence space until the sequence with highest affinity is identified.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2014:PFP, author = "Guoxian Yu and Huzefa Rangwala and Carlotta Domeniconi and Guoji Zhang and Zhiwen Yu", title = "Protein function prediction with incomplete annotations", journal = j-TCBB, volume = "11", number = "3", pages = "579--591", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.142", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Automated protein function prediction is one of the grand challenges in computational biology. Multi-label learning is widely used to predict functions of proteins. Most of multi-label learning methods make prediction for unlabeled proteins under the assumption that the labeled proteins are completely annotated, i.e., without any missing functions. However, in practice, we may have a subset of the ground-truth functions for a protein, and whether the protein has other functions is unknown. To predict protein functions with incomplete annotations, we propose a Protein Function Prediction method with Weak-label Learning (ProWL) and its variant ProWL-IF. Both ProWL and ProWL-IF can replenish the missing functions of proteins. In addition, ProWL-IF makes use of the knowledge that a protein cannot have certain functions, which can further boost the performance of protein function prediction. Our experimental results on protein-protein interaction networks and gene expression benchmarks validate the effectiveness of both ProWL and ProWL-IF.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lopez-Caamal:2014:SQC, author = "Fernando L{\'o}pez-Caamal and Diego A. Oyarz{\'u}n and Richard H. Middleton and M{\'\i}riam R. Garc{\'\i}a", title = "Spatial quantification of cytosolic {Ca$ {2+} $} accumulation in nonexcitable cells: an analytical study", journal = j-TCBB, volume = "11", number = "3", pages = "592--603", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2316010", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Calcium ions act as messengers in a broad range of processes such as learning, apoptosis, and muscular movement. The transient profile and the temporal accumulation of calcium signals have been suggested as the two main characteristics in which calcium cues encode messages to be forwarded to downstream pathways. We address the analytical quantification of calcium temporal-accumulation in a long, thin section of a nonexcitable cell by solving a boundary value problem. In these expressions we note that the cytosolic Ca$^{2+}$ accumulation is independent of every intracellular calcium flux and depends on the Ca$^{2+}$ exchange across the membrane, cytosolic calcium diffusion, geometry of the cell, extracellular calcium perturbation, and initial concentrations. In particular, we analyse the time-integrated response of cytosolic calcium due to (i) a localised initial concentration of cytosolic calcium and (ii) transient extracellular perturbation of calcium. In these scenarios, we conclude that (i) the range of calcium progression is confined to the vicinity of the initial concentration, thereby creating calcium microdomains; and (ii) we observe a low-pass filtering effect in the response driven by extracellular Ca$^{2+}$ perturbations. Additionally, we note that our methodology can be used to analyse a broader range of stimuli and scenarios.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guan:2014:BDC, author = "Benjamin X. Guan and Bir Bhanu and Prue Talbot and Sabrina Lin", title = "Bio-driven cell region detection in human embryonic stem cell assay", journal = j-TCBB, volume = "11", number = "3", pages = "604--611", month = may, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306836", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:22 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper proposes a bio-driven algorithm that detects cell regions automatically in the human embryonic stem cell (hESC) images obtained using a phase contrast microscope. The algorithm uses both statistical intensity distributions of foreground/hESCs and background/substrate as well as cell property for cell region detection. The intensity distributions of foreground/hESCs and background/substrate are modeled as a mixture of two Gaussians. The cell property is translated into local spatial information. The algorithm is optimized by parameters of the modeled distributions and cell regions evolve with the local cell property. The paper validates the method with various videos acquired using different microscope objectives. In comparison with the state-of-the-art methods, the proposed method is able to detect the entire cell region instead of fragmented cell regions. It also yields high marks on measures such as Jacard similarity, Dice coefficient, sensitivity and specificity. Automated detection by the proposed method has the potential to enable fast quantifiable analysis of hESCs using large data sets which are needed to understand dynamic cell behaviors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2014:RAE, author = "Ying Xu", title = "Reviewer appreciation editorial", journal = j-TCBB, volume = "11", number = "4", pages = "613--613", month = jul, year = "2014", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2014:GEA, author = "Shuigeng Zhou and Yi-Ping Phoebe Chen", title = "Guest editorial for the {12th Asia Pacific Bioinformatics Conference}", journal = j-TCBB, volume = "11", number = "4", pages = "614--615", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2327487", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2014:FIN, author = "Bin Xu and Jihong Guan", title = "From function to interaction: a new paradigm for accurately predicting protein complexes based on protein-to-protein interaction networks", journal = j-TCBB, volume = "11", number = "4", pages = "616--627", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306825", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of protein complexes is critical to understand complex formation and protein functions. Recent advances in high-throughput experiments have provided large data sets of protein-protein interactions (PPIs). Many approaches, based on the assumption that complexes are dense subgraphs of PPI networks (PINs in short), have been proposed to predict complexes using graph clustering methods. In this paper, we introduce a novel from-function-to-interaction paradigm for protein complex detection. As proteins perform biological functions by forming complexes, we first cluster proteins using biology process (BP) annotations from gene ontology (GO). Then, we map the resulting protein clusters onto a PPI network (PIN in short), extract connected subgraphs consisting of clustered proteins from the PPI network and expand each connected subgraph with protein nodes that have rich links to the proteins in the subgraph. Such expanded subgraphs are taken as predicted complexes. We apply the proposed method (called CPredictor) to two PPI data sets of S. cerevisiae for predicting protein complexes. Experimental results show that CPredictor outperforms the existing methods. The outstanding precision of CPredictor proves that the from-function-to-interaction paradigm provides a new and effective way to computational detection of protein complexes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Comin:2014:BFR, author = "Matteo Comin and Davide Verzotto", title = "Beyond fixed-resolution alignment-free measures for mammalian enhancers sequence comparison", journal = j-TCBB, volume = "11", number = "4", pages = "628--637", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306830", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The cell-type diversity is to a large degree driven by transcription regulation, i.e., enhancers. It has been recently shown that in high-level eukaryotes enhancers rarely work alone, instead they collaborate by forming clusters of cis-regulatory modules (CRMs). Even if the binding of transcription factors is sequence-specific, the identification of functionally similar enhancers is very difficult. A similarity measure to detect related regulatory sequences is crucial to understand functional correlation between two enhancers. This will allow large-scale analyses, clustering and genome-wide classifications. In this paper we present Under 2, a parameter-free alignment-free statistic based on variable-length words. As opposed to traditional alignment-free methods, which are based on fixed-length patterns or, in other words, tied to a fixed resolution, our statistic is built upon variable-length words, and thus multiple resolutions are allowed. This will capture the great variability of lengths of CRMs. We evaluate several alignment-free statistics on simulated data and real ChIP-seq sequences. The new statistic is highly successful in discriminating functionally related enhancers and, in almost all experiments, it outperforms fixed-resolution methods. Finally, experiments on mouse enhancers show that Under2 can separate enhancers active in different tissues. Availability: http://www.dei.unipd.it/~ciompin/main/UnderIICRMS.html", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gan:2014:NWB, author = "Yanglan Gan and Guobing Zou and Jihong Guan and Guangwei Xu", title = "A novel wavelet-based approach for predicting nucleosome positions using {DNA} structural information", journal = j-TCBB, volume = "11", number = "4", pages = "638--647", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306837", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nucleosomes are basic elements of chromatin structure. The positioning of nucleosomes along a genome is very important to dictate eukaryotic DNA compaction and access. Current computational methods have focused on the analysis of nucleosome occupancy and the positioning of well-positioned nucleosomes. However, fuzzy nucleosomes require more complex configurations and are more difficult to predict their positions. We analyzed the positioning of well-positioned and fuzzy nucleosomes from a novel structural perspective, and proposed WaveNuc, a computational approach for inferring their positions based on continuous wavelet transformation. The comparative analysis demonstrates that these two kinds of nucleosomes exhibit different propeller twist structural characteristics. Well-positioned nucleosomes tend to locate at sharp peaks of the propeller twist profile, whereas fuzzy nucleosomes correspond to broader peaks. The sharpness of these peaks shows that the propeller twist profile may contain nucleosome positioning information. Exploiting this knowledge, we applied WaveNuc to detect the two different kinds of peaks of the propeller twist profile along the genome. We compared the performance of our method with existing methods on real data sets. The results show that the proposed method can accurately resolve complex configurations of fuzzy nucleosomes, which leads to better performance of nucleosome positioning prediction on the whole genome.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lo:2014:SCR, author = "Christine Lo and Boyko Kakaradov and Daniel Lokshtanov and Christina Boucher", title = "{SeeSite}: characterizing relationships between splice junctions and splicing enhancers", journal = j-TCBB, volume = "11", number = "4", pages = "648--656", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2304294", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA splicing is a cellular process driven by the interaction between numerous regulatory sequences and binding sites, however, such interactions have been primarily explored by laboratory methods since computational tools largely ignore the relationship between different splicing elements. Current computational methods identify either splice sites or other regulatory sequences, such as enhancers and silencers. We present a novel approach for characterizing co-occurring relationships between splice site motifs and splicing enhancers. Our approach relies on an efficient algorithm for approximately solving Consensus Sequence with Outliers, an NP-complete string clustering problem. In particular, we give an algorithm for this problem that outputs near-optimal solutions in polynomial time. To our knowledge, this is the first formulation and computational attempt for detecting co-occurring sequence elements in RNA sequence data. Further, we demonstrate that SeeSite is capable of showing that certain ESEs are preferentially associated with weaker splice sites, and that there exists a co-occurrence relationship with splice site motifs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2014:IEL, author = "Hualong Yu and Jun Ni", title = "An improved ensemble learning method for classifying high-dimensional and imbalanced biomedicine data", journal = j-TCBB, volume = "11", number = "4", pages = "657--666", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306838", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Training classifiers on skewed data can be technically challenging tasks, especially if the data is high-dimensional simultaneously, the tasks can become more difficult. In biomedicine field, skewed data type often appears. In this study, we try to deal with this problem by combining asymmetric bagging ensemble classifier (as Bagging) that has been presented in previous work and an improved random subspace (RS) generation strategy that is called feature subspace (FSS). Specifically, FSS is a novel method to promote the balance level between accuracy and diversity of base classifiers in as Bagging. In view of the strong generalization capability of support vector machine (SVM), we adopt it to be base classifier. Extensive experiments on four benchmark biomedicine data sets indicate that the proposed ensemble learning method outperforms many baseline approaches in terms of Accuracy, F-measure, G-mean and AUC evaluation criterions, thus it can be regarded as an effective and efficient tool to deal with high-dimensional and imbalanced biomedical data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2014:PRA, author = "Fei Hu and Jun Zhou and Lingxi Zhou and Jijun Tang", title = "Probabilistic reconstruction of ancestral gene orders with insertions and deletions", journal = j-TCBB, volume = "11", number = "4", pages = "667--672", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2309602", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Changes of gene orderings have been extensively used as a signal to reconstruct phylogenies and ancestral genomes. Inferring the gene order of an extinct species has a wide range of applications, including the potential to reveal more detailed evolutionary histories, to determine gene content and ordering, and to understand the consequences of structural changes for organismal function and species divergence. In this study, we propose a new adjacency-based method, PMAG+, to infer ancestral genomes under a more general model of gene evolution involving gene insertions and deletions (indels), in addition to gene rearrangements. PMAG+ improves on our previous method PMAG by developing a new approach to infer ancestral gene contents and reducing the adjacency assembly problem to an instance of TSP. We designed a series of experiments to extensively validate PMAG+ and compared the results with the most recent and comparable method GapAdj. According to the results, ancestral gene contents predicted by PMAG+ coincides highly with the actual contents with error rates less than 1 percent. Under various degrees of indels, PMAG+ consistently achieves more accurate prediction of ancestral gene orders and at the same time, produces contigs very close to the actual chromosomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Thomas:2014:MLE, author = "Minta Thomas and Anneleen Daemen and Bart {De Moor}", title = "Maximum likelihood estimation of {GEVD}: applications in bioinformatics", journal = j-TCBB, volume = "11", number = "4", pages = "673--680", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2304292", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a method, maximum likelihood estimation of generalized eigenvalue decomposition (MLGEVD) that employs a well known technique relying on the generalization of singular value decomposition (SVD). The main aim of the work is to show the tight equivalence between MLGEVD and generalized ridge regression. This relationship reveals an important mathematical property of GEVD in which the second argument act as prior information in the model. Thus we show that MLGEVD allows the incorporation of external knowledge about the quantities of interest into the estimation problem. We illustrate the importance of prior knowledge in clinical decision making/identifying differentially expressed genes with case studies for which microarray data sets with corresponding clinical/literature information are available. On all of these three case studies, MLGEVD outperformed GEVD on prediction in terms of test area under the ROC curve (test AUC). MLGEVD results in significantly improved diagnosis, prognosis and prediction of therapy response.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ichikawa:2014:SPH, author = "Kazuki Ichikawa and Shinichi Morishita", title = "A simple but powerful heuristic method for accelerating $k$-means clustering of large-scale data in life science", journal = j-TCBB, volume = "11", number = "4", pages = "681--692", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306200", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "K -means clustering has been widely used to gain insight into biological systems from large-scale life science data. To quantify the similarities among biological data sets, Pearson correlation distance and standardized Euclidean distance are used most frequently; however, optimization methods have been largely unexplored. These two distance measurements are equivalent in the sense that they yield the same $k$-means clustering result for identical sets of $k$ initial centroids. Thus, an efficient algorithm used for one is applicable to the other. Several optimization methods are available for the Euclidean distance and can be used for processing the standardized Euclidean distance; however, they are not customized for this context. We instead approached the problem by studying the properties of the Pearson correlation distance, and we invented a simple but powerful heuristic method for markedly pruning unnecessary computation while retaining the final solution. Tests using real biological data sets with 50--60K vectors of dimensions 10--2001 ($ \approx 400$MB in size) demonstrated marked reduction in computation time for $ k = 10$--$ 500$ in comparison with other state-of-the-art pruning methods such as Elkan's and Hamerly's algorithms. The BoostKCP software is available at http://mlab.cb.k.u-tokyo.ac.jp/ ichikawa/boostKCP/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gao:2014:HTZ, author = "Yuan Gao and Rosa H. M. Chan and Tommy W. S. Chow and Liyun Zhang and Sylvia Bonilla and Chi-Pui Pang and Mingzhi Zhang and Yuk Fai Leung", title = "A high-throughput zebrafish screening method for visual mutants by light-induced locomotor response", journal = j-TCBB, volume = "11", number = "4", pages = "693--701", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2306829", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Normal and visually-impaired zebrafish larvae have differentiable light-induced locomotor response (LLR), which is composed of visual and non-visual components. It is recently demonstrated that differences in the acute phase of the LLR, also known as the visual motor response (VMR), can be utilized to evaluate new eye drugs. However, most of the previous studies focused on the average LLR activity of a particular genotype, which left information that could address differences in individual zebrafish development unattended. In this study, machine learning techniques were employed to distinguish not only zebrafish larvae of different genotypes, but also different batches, based on their response to light stimuli. This approach allows us to perform efficient high-throughput zebrafish screening with relatively simple preparations. Following the general machine learning framework, some discriminative features were first extracted from the behavioral data. Both unsupervised and supervised learning algorithms were implemented for the classification of zebrafish of different genotypes and batches. The accuracy of the classification in genotype was over 80 percent and could achieve up to 95 percent in some cases. The results obtained shed light on the potential of using machine learning techniques for analyzing behavioral data of zebrafish, which may enhance the reliability of high-throughput drug screening.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chuang:2014:NSG, author = "Chia-Hua Chuang and Chun-Liang Lin", title = "A novel synthesizing genetic logic circuit: frequency multiplier", journal = j-TCBB, volume = "11", number = "4", pages = "702--713", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2316814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents a novel synthesizing genetic logic circuit design based on an existing synthetic genetic oscillator, which provides a function of frequency multiplier to synthesize a clock signal whose frequency is a multiple of that of the genetic oscillator. In the renowned literature, the synthetic genetic oscillator, known as a repressilator, has been successfully built in Escherichia coli to generate a periodic oscillating phenomenon through three repressive genes repress each other in a chain. On the basis of this fact, our proposed genetic frequency multiplier circuit utilizes genetic Buffers in series with a waveform-shaping circuit to reshape the genetic oscillation signal into a crisp logic clock signal. By regulating different threshold levels in the Buffer, the time length of logic high/low levels in a fundamental sinusoidal wave can be engineered to pulse-width-modulated (PWM) signals with various duty cycles. Integrating some of genetic logic XOR gates and PWM signals from the output of the Buffers, a genetic frequency multiplier circuit can be created and the clock signal with the integer-fold of frequency of the genetic oscillator is generated. The synthesized signal can be used in triggering the downstream digital genetic logic circuits. Simulation results show the applicability of the proposed idea.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Su:2014:AMD, author = "Hai Su and Fuyong Xing and Jonah D. Lee and Charlotte A. Peterson and Lin Yang", title = "Automatic myonuclear detection in isolated single muscle fibers using robust ellipse fitting and sparse representation", journal = j-TCBB, volume = "11", number = "4", pages = "714--726", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2013.151", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate and robust detection of myonuclei in isolated single muscle fibers is required to calculate myonuclear domain size. However, this task is challenging because: (1) shape and size variations of the nuclei, (2) overlapping nuclear clumps, and (3) multiple $z$-stack images with out-of-focus regions. In this paper, we have proposed a novel automatic detection algorithm to robustly quantify myonuclei in isolated single skeletal muscle fibers. The original $z$-stack images are first converted into one all-in-focus image using multi-focus image fusion. A sufficient number of ellipse fitting hypotheses are then generated from the myonuclei contour segments using heteroscedastic errors-in-variables (HEIV) regression. A set of representative training samples and a set of discriminative features are selected by a two-stage sparse model. The selected samples with representative features are utilized to train a classifier to select the best candidates. A modified inner geodesic distance based mean-shift clustering algorithm is used to produce the final nuclei detection results. The proposed method was extensively tested using 42 sets of $z$-stack images containing over 1,500 myonuclei. The method demonstrates excellent results that are better than current state-of-the-art approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2014:DSB, author = "Zhiwen Yu and Hongsheng Chen and Jane You and Hau-San Wong and Jiming Liu and Le Li and Guoqiang Han", title = "Double selection based semi-supervised clustering ensemble for tumor clustering from gene expression profiles", journal = j-TCBB, volume = "11", number = "4", pages = "727--740", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2315996", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tumor clustering is one of the important techniques for tumor discovery from cancer gene expression profiles, which is useful for the diagnosis and treatment of cancer. While different algorithms have been proposed for tumor clustering, few make use of the expert's knowledge to better the performance of tumor discovery. In this paper, we first view the expert's knowledge as constraints in the process of clustering, and propose a feature selection based semi-supervised cluster ensemble framework (FS-SSCE) for tumor clustering from bio-molecular data. Compared with traditional tumor clustering approaches, the proposed framework FS-SSCE is featured by two properties: (1) The adoption of feature selection techniques to dispel the effect of noisy genes. (2) The employment of the binate constraint based K-means algorithm to take into account the effect of experts' knowledge. Then, a double selection based semi-supervised cluster ensemble framework (DS-SSCE) which not only applies the feature selection technique to perform gene selection on the gene dimension, but also selects an optimal subset of representative clustering solutions in the ensemble and improve the performance of tumor clustering using the normalized cut algorithm. DS-SSCE also introduces a confidence factor into the process of constructing the consensus matrix by considering the prior knowledge of the data set. Finally, we design a modified double selection based semi-supervised cluster ensemble framework (MDS-SSCE) which adopts multiple clustering solution selection strategies and an aggregated solution selection function to choose an optimal subset of clustering solutions. The results in the experiments on cancer gene expression profiles show that (i) FS-SSCE, DS-SSCE and MDS-SSCE are suitable for performing tumor clustering from bio-molecular data. (ii) MDS-SSCE outperforms a number of state-of-the-art tumor clustering approaches on most of the data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fa:2014:NRG, author = "Rui Fa and Asoke K. Nandi", title = "Noise resistant generalized parametric validity index of clustering for gene expression data", journal = j-TCBB, volume = "11", number = "4", pages = "741--752", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2312006", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Validity indices have been investigated for decades. However, since there is no study of noise-resistance performance of these indices in the literature, there is no guideline for determining the best clustering in noisy data sets, especially microarray data sets. In this paper, we propose a generalized parametric validity (GPV) index which employs two tunable parameters $ \alpha $ and $ \beta $ to control the proportions of objects being considered to calculate the dissimilarities. The greatest advantage of the proposed GPV index is its noise-resistance ability, which results from the flexibility of tuning the parameters. Several rules are set to guide the selection of parameter values. To illustrate the noise-resistance performance of the proposed index, we evaluate the GPV index for assessing five clustering algorithms in two gene expression data simulation models with different noise levels and compare the ability of determining the number of clusters with eight existing indices. We also test the GPV in three groups of real gene expression data sets. The experimental results suggest that the proposed GPV index has superior noise-resistance ability and provides fairly accurate judgements.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Garcia-Jimenez:2014:PPR, author = "Beatriz Garc{\'\i}a-Jim{\'e}nez and Tirso Pons and Araceli Sanchis and Alfonso Valencia", title = "Predicting protein relationships to human pathways through a relational learning approach based on simple sequence features", journal = j-TCBB, volume = "11", number = "4", pages = "753--765", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2318730", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological pathways are important elements of systems biology and in the past decade, an increasing number of pathway databases have been set up to document the growing understanding of complex cellular processes. Although more genome-sequence data are becoming available, a large fraction of it remains functionally uncharacterized. Thus, it is important to be able to predict the mapping of poorly annotated proteins to original pathway models. Results: We have developed a Relational Learning-based Extension (RLE) system to investigate pathway membership through a function prediction approach that mainly relies on combinations of simple properties attributed to each protein. RLE searches for proteins with molecular similarities to specific pathway components. Using RLE, we associated 383 uncharacterized proteins to 28 pre-defined human Reactome pathways, demonstrating relative confidence after proper evaluation. Indeed, in specific cases manual inspection of the database annotations and the related literature supported the proposed classifications. Examples of possible additional components of the Electron transport system, Telomere maintenance and Integrin cell surface interactions pathways are discussed in detail. Availability: All the human predicted proteins in the 2009 and 2012 releases 30 and 40 of Reactome are available at http://rle.bioinfo.cnio.es.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kong:2014:BSD, author = "Ao Kong and Chinmaya Gupta and Mauro Ferrari and Marco Agostini and Chiara Bedin and Ali Bouamrani and Ennio Tasciotti and Robert Azencott", title = "Biomarker signature discovery from mass spectrometry data", journal = j-TCBB, volume = "11", number = "4", pages = "766--772", month = jul, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2318718", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 6 16:13:27 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mass spectrometry based high throughput proteomics are used for protein analysis and clinical diagnosis. Many machine learning methods have been used to construct classifiers based on mass spectrometry data, for discrimination between cancer stages. However, the classifiers generated by machine learning such as SVM techniques typically lack biological interpretability. We present an innovative technique for automated discovery of signatures optimized to characterize various cancer stages. We validate our signature discovery algorithm on one new colorectal cancer MALDI-TOF data set, and two well-known ovarian cancer SELDI-TOF data sets. In all of these cases, our signature based classifiers performed either better or at least as well as four benchmark machine learning algorithms including SVM and KNN. Moreover, our optimized signatures automatically select smaller sets of key biomarkers than the black-boxes generated by machine learning, and are much easier to interpret.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pandey:2014:GES, author = "Gaurav Pandey and Huzefa Rangwala", title = "Guest editorial for special section on {BIOKDD2013}", journal = j-TCBB, volume = "11", number = "5", pages = "773--774", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2348731", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fakhraei:2014:NBD, author = "Shobeir Fakhraei and Bert Huang and Louiqa Raschid and Lise Getoor", title = "Network-based drug-target interaction prediction with probabilistic soft logic", journal = j-TCBB, volume = "11", number = "5", pages = "775--787", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325031", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Drug-target interaction studies are important because they can predict drugs' unexpected therapeutic or adverse side effects. In silico predictions of potential interactions are valuable and can focus effort on in vitro experiments. We propose a prediction framework that represents the problem using a bipartite graph of drug-target interactions augmented with drug-drug and target-target similarity measures and makes predictions using probabilistic soft logic (PSL). Using probabilistic rules in PSL, we predict interactions with models based on triad and tetrad structures. We apply (blocking) techniques that make link prediction in PSL more efficient for drug-target interaction prediction. We then perform extensive experimental studies to highlight different aspects of the model and the domain, first comparing the models with different structures and then measuring the effect of the proposed blocking on the prediction performance and efficiency. We demonstrate the importance of rule weight learning in the proposed PSL model and then show that PSL can effectively make use of a variety of similarity measures. We perform an experiment to validate the importance of collective inference and using multiple similarity measures for accurate predictions in contrast to non-collective and single similarity assumptions. Finally, we illustrate that our PSL model achieves state-of-the-art performance with simple, interpretable rules and evaluate our novel predictions using online data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Seetan:2014:RRH, author = "Raed I. Seetan and Anne M. Denton and Omar Al-Azzam and Ajay Kumar and M. Javed Iqbal and Shahryar F. Kianian", title = "Reliable radiation hybrid maps: an efficient scalable clustering-based approach", journal = j-TCBB, volume = "11", number = "5", pages = "788--800", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2329310", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The process of mapping markers from radiation hybrid mapping (RHM) experiments is equivalent to the traveling salesman problem and, thereby, has combinatorial complexity. As an additional problem, experiments typically result in some unreliable markers that reduce the overall quality of the map. We propose a clustering approach for addressing both problems efficiently by eliminating unreliable markers without the need for mapping the complete set of markers. Traditional approaches for eliminating markers use resampling of the full data set, which has an even higher computational complexity than the original mapping problem. In contrast, the proposed approach uses a divide-and-conquer strategy to construct framework maps based on clusters that exclude unreliable markers. Clusters are ordered using parallel processing and are then combined to form the complete map. We present three algorithms that explore the trade-off between the number of markers included in the map and placement accuracy. Using an RHM data set of the human genome, we compare the framework maps from our proposed approaches with published physical maps and with the results of using the Carthagene tool. Overall, our approaches have a very low computational complexity and produce solid framework maps with good chromosome coverage and high agreement with the physical map marker order.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Goncalves:2014:LEH, author = "Joana P. Gon{\c{c}}alves and Sara C. Madeira", title = "{LateBiclustering}: efficient heuristic algorithm for time-lagged bicluster identification", journal = j-TCBB, volume = "11", number = "5", pages = "801--813", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2312007", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identifying patterns in temporal data is key to uncover meaningful relationships in diverse domains, from stock trading to social interactions. Also of great interest are clinical and biological applications, namely monitoring patient response to treatment or characterizing activity at the molecular level. In biology, researchers seek to gain insight into gene functions and dynamics of biological processes, as well as potential perturbations of these leading to disease, through the study of patterns emerging from gene expression time series. Clustering can group genes exhibiting similar expression profiles, but focuses on global patterns denoting rather broad, unspecific responses. Biclustering reveals local patterns, which more naturally capture the intricate collaboration between biological players, particularly under a temporal setting. Despite the general biclustering formulation being NP-hard, considering specific properties of time series has led to efficient solutions for the discovery of temporally aligned patterns. Notably, the identification of biclusters with time-lagged patterns, suggestive of transcriptional cascades, remains a challenge due to the combinatorial explosion of delayed occurrences. Herein, we propose LateBiclustering, a sensible heuristic algorithm enabling a polynomial rather than exponential time solution for the problem. We show that it identifies meaningful time-lagged biclusters relevant to the response of Saccharomyces cerevisiae to heat stress.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2014:DSC, author = "Cheng Zhou and Pieter Meysman and Boris Cule and Kris Laukens and Bart Goethals", title = "Discovery of spatially cohesive itemsets in three-dimensional protein structures", journal = j-TCBB, volume = "11", number = "5", pages = "814--825", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2311795", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper we present a cohesive structural itemset miner aiming to discover interesting patterns in a set of data objects within a multidimensional spatial structure by combining the cohesion and the support of the pattern. We propose two ways to build the itemset miner, VertexOne and VertexAll, in an attempt to find a balance between accuracy and run-times. The experiments show that VertexOne performs better, and finds almost the same itemsets as VertexAll in a much shorter time. The usefulness of the method is demonstrated by applying it to find interesting patterns of amino acids in spatial proximity within a set of proteins based on their atomic coordinates in the protein molecular structure. Several patterns found by the cohesive structural itemset miner contain amino acids that frequently co-occur in the spatial structure, even if they are distant in the primary protein sequence and only brought together by protein folding. Further various indications were found that some of the discovered patterns seem to represent common underlying support structures within the proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zeller:2014:GAP, author = "Michael Zeller and Christophe N. Magnan and Vishal R. Patel and Paul Rigor and Leonard Sender and Pierre Baldi", title = "A genomic analysis pipeline and its application to pediatric cancers", journal = j-TCBB, volume = "11", number = "5", pages = "826--839", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2330616", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a cancer genomic analysis pipeline which takes as input sequencing reads for both germline and tumor genomes and outputs filtered lists of all genetic mutations in the form of short ranked list of the most affected genes in the tumor, using either the Complete Genomics or Illumina platforms. A novel reporting and ranking system has been developed that makes use of publicly available datasets and literature specific to each patient, including new methods for using publicly available expression data in the absence of proper control data. Previously implicated small and large variations (including gene fusions) are reported in addition to probable driver mutations. Relationships between cancer and the sequenced tumor genome are highlighted using a network-based approach that integrates known and predicted protein-protein, protein-TF, and protein-drug interaction data. By using an integrative approach, effects of genetic variations on gene expression are used to provide further evidence of driver mutations. This pipeline has been developed with the aim to be used in assisting in the analysis of pediatric tumors, as an unbiased and automated method for interpreting sequencing results along with identifying potentially therapeutic drugs and their targets. We present results that agree with previous literature and highlight specific findings in a few patients.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2014:ANG, author = "Peng Chen and Chao Wang and Xi Li and Xuehai Zhou", title = "Accelerating the next generation long read mapping with the {FPGA}-based system", journal = j-TCBB, volume = "11", number = "5", pages = "840--852", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2326876", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To compare the newly determined sequences against the subject sequences stored in the databases is a critical job in the bioinformatics. Fortunately, recent survey reports that the state-of-the-art aligners are already fast enough to handle the ultra amount of short sequence reads in the reasonable time. However, for aligning the long sequence reads ({$>$400} bp) generated by the next generation sequencing (NGS) technology, it is still quite inefficient with present aligners. Furthermore, the challenge becomes more and more serious as the lengths and the amounts of the sequence reads are both keeping increasing with the improvement of the sequencing technology. Thus, it is extremely urgent for the researchers to enhance the performance of the long read alignment. In this paper, we propose a novel FPGA-based system to improve the efficiency of the long read mapping. Compared to the state-of-the-art long read aligner BWA-SW, our accelerating platform could achieve a high performance with almost the same sensitivity. Experiments demonstrate that, for reads with lengths ranging from 512 up to 4,096 base pairs, the described system obtains a 10 $ \times $ -48$ \times $ speedup for the bottleneck of the software. As to the whole mapping procedure, the FPGA-based platform could achieve a 1:8$ \times $ -3:3$ \times $ speedup versus the BWA-SW aligner, reducing the alignment cycles from weeks to days.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meira:2014:AMA, author = "Luis A. A. Meira and Vin{\'\i}cius R. M{\'a}ximo and {\'A}lvaro L. Fazenda and Arlindo F. {Da Concei{\c{c}}{\~a}o}", title = "{Acc-Motif}: accelerated network motif detection", journal = j-TCBB, volume = "11", number = "5", pages = "853--862", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321150", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Network motif algorithms have been a topic of research mainly after the 2002-seminal paper from Milo et al. [1], which provided motifs as a way to uncover the basic building blocks of most networks. Motifs have been mainly applied in Bioinformatics, regarding gene regulation networks. Motif detection is based on induced subgraph counting. This paper proposes an algorithm to count subgraphs of size k + 2 based on the set of induced subgraphs of size k. The general technique was applied to detect 3, 4 and 5-sized motifs in directed graphs. Such algorithms have time complexity O(a(G)m), O(m$^2$ ) and O(nm$^2$ ), respectively, where a(G) is the arboricity of G(V,E). The computational experiments in public data sets show that the proposed technique was one order of magnitude faster than Kavosh and FANMOD. When compared to NetMODE, acc-Motif had a slightly improved performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Messaoudi:2014:BSS, author = "Imen Messaoudi and Afef Elloumi-Oueslati and Zied Lachiri", title = "Building specific signals from frequency chaos game and revealing periodicities using a smoothed {Fourier} analysis", journal = j-TCBB, volume = "11", number = "5", pages = "863--877", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2315991", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Investigating the roles and functions of DNA within genomes is becoming a primary focus of genomic research. Thus, the research works are moving towards cooperation between different scientific disciplines which aims at facilitating the interpretation of genetic information. In order to characterize the DNA of living organisms, signal processing tools appear to be very suitable for such study. However, a DNA sequence must be converted into a numerical sequence before processing; which defines the concept of DNA coding. In line with this, we propose a new one dimensional model based on the chaos game representation theory called Frequency Chaos Game Signal: FCGS. Then, we perform a Smoothed Fourier Transform to enhance hidden periodicities in the C.elegans DNA sequences. Through this study, we demonstrate the performance of our coding approach in highlighting characteristic periodicities. Indeed, several periodicities are shown to be involved in the 1D spectra and the 2D spectrograms of FCGSs. To investigate further about the contribution of our method in the enhancement of characteristic spectral attributes, a comparison with a range of binary indicators is established.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Krotzky:2014:EGB, author = "Timo Krotzky and Thomas Fober and Eyke H{\"u}llermeier and Gerhard Klebe", title = "Extended graph-based models for enhanced similarity search in {Cavbase}", journal = j-TCBB, volume = "11", number = "5", pages = "878--890", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325020", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To calculate similarities between molecular structures, measures based on the maximum common subgraph are frequently applied. For the comparison of protein binding sites, these measures are not fully appropriate since graphs representing binding sites on a detailed atomic level tend to get very large. In combination with an NP-hard problem, a large graph leads to a computationally demanding task. Therefore, for the comparison of binding sites, a less detailed coarse graph model is used building upon so-called pseudocenters. Consistently, a loss of structural data is caused since many atoms are discarded and no information about the shape of the binding site is considered. This is usually resolved by performing subsequent calculations based on additional information. These steps are usually quite expensive, making the whole approach very slow. The main drawback of a graph-based model solely based on pseudocenters, however, is the loss of information about the shape of the protein surface. In this study, we propose a novel and efficient modeling formalism that does not increase the size of the graph model compared to the original approach, but leads to graphs containing considerably more information assigned to the nodes. More specifically, additional descriptors considering surface characteristics are extracted from the local surface and attributed to the pseudocenters stored in Cavbase. These properties are evaluated as additional node labels, which lead to a gain of information and allow for much faster but still very accurate comparisons between different structures.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2014:GWP, author = "Jian-Sheng Wu and Sheng-Jun Huang and Zhi-Hua Zhou", title = "Genome-wide protein function prediction through multi-instance multi-label learning", journal = j-TCBB, volume = "11", number = "5", pages = "891--902", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2323058", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Automated annotation of protein function is challenging. As the number of sequenced genomes rapidly grows, the vast majority of proteins can only be annotated computationally. Nature often brings several domains together to form multidomain and multi-functional proteins with a vast number of possibilities, and each domain may fulfill its own function independently or in a concerted manner with its neighbors. Thus, it is evident that the protein function prediction problem is naturally and inherently Multi-Instance Multi-Label (MIML) learning tasks. Based on the state-of-the-art MIML algorithm MIMLNN, we propose a novel ensemble MIML learning framework EnMIMLNN and design three algorithms for this task by combining the advantage of three kinds of Hausdorff distance metrics. Experiments on seven real-world organisms covering the biological three-domain system, i.e., archaea, bacteria, and eukaryote, show that the EnMIMLNN algorithms are superior to most state-of-the-art MIML and Multi-Label learning algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Limpiti:2014:IIN, author = "Tulaya Limpiti and Chainarong Amornbunchornvej and Apichart Intarapanich and Anunchai Assawamakin and Sissades Tongsima", title = "{iNJclust}: iterative neighbor-joining tree clustering framework for inferring population structure", journal = j-TCBB, volume = "11", number = "5", pages = "903--914", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2322372", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Understanding genetic differences among populations is one of the most important issues in population genetics. Genetic variations, e.g., single nucleotide polymorphisms, are used to characterize commonality and difference of individuals from various populations. This paper presents an efficient graph-based clustering framework which operates iteratively on the Neighbor-Joining (NJ) tree called the iNJclust algorithm. The framework uses well-known genetic measurements, namely the allele-sharing distance, the neighbor-joining tree, and the fixation index. The behavior of the fixation index is utilized in the algorithm's stopping criterion. The algorithm provides an estimated number of populations, individual assignments, and relationships between populations as outputs. The clustering result is reported in the form of a binary tree, whose terminal nodes represent the final inferred populations and the tree structure preserves the genetic relationships among them. The clustering performance and the robustness of the proposed algorithm are tested extensively using simulated and real data sets from bovine, sheep, and human populations. The result indicates that the number of populations within each data set is reasonably estimated, the individual assignment is robust, and the structure of the inferred population tree corresponds to the intrinsic relationships among populations within the data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2014:MCP, author = "Xiaoqing Liu and Jun Wu and Haipeng Gong and Shengchun Deng and Zengyou He", title = "Mining conditional phosphorylation motifs", journal = j-TCBB, volume = "11", number = "5", pages = "915--927", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321400", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phosphorylation motifs represent position-specific amino acid patterns around the phosphorylation sites in the set of phosphopeptides. Several algorithms have been proposed to uncover phosphorylation motifs, whereas the problem of efficiently discovering a set of significant motifs with sufficiently high coverage and non-redundancy still remains unsolved. Here we present a novel notion called conditional phosphorylation motifs. Through this new concept, the motifs whose over-expressiveness mainly benefits from its constituting parts can be filtered out effectively. To discover conditional phosphorylation motifs, we propose an algorithm called C-Motif for a non-redundant identification of significant phosphorylation motifs. C-Motif is implemented under the Apriori framework, and it tests the statistical significance together with the frequency of candidate motifs in a single stage. Experiments demonstrate that C-Motif outperforms some current algorithms such as MMFPh and Motif-All in terms of coverage and non-redundancy of the results and efficiency of the execution. The source code of C-Motif is available at: https://sourceforge. net/projects/cmotif/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kirkpatrick:2014:PPP, author = "Bonnie Kirkpatrick and Kristian Stevens", title = "Perfect phylogeny problems with missing values", journal = j-TCBB, volume = "11", number = "5", pages = "928--941", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2316005", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The perfect phylogeny problem is of central importance to both evolutionary biology and population genetics. Missing values are a common occurrence in both sequence and genotype data, but they make the problem of finding a perfect phylogeny NP-hard even for binary characters. We introduce new and efficient perfect phylogeny algorithms for broad classes of binary and multistate data with missing values. Specifically, we address binary missing data consistent with the rich data hypothesis (RDH) introduced by Halperin and Karp and give an efficient algorithm for enumerating phylogenies. This algorithm is useful for computing the probability of data with missing values under the coalescent model. In addition, we use the partition intersection (PI) graph and chordal graph theory to generalize the RDH to multi-state characters with missing values. For a bounded number of states, we provide a fixed parameter tractable algorithm for the perfect phylogeny problem with missing data. Utilizing the PI graph, we are able to show that under multiple biologically motivated models for character data, our generalized RDH holds with high probability, and we evaluate our results with extensive empirical analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Horta:2014:SMC, author = "Danilo Horta and Ricardo J. G. B. Campello", title = "Similarity measures for comparing biclusterings", journal = j-TCBB, volume = "11", number = "5", pages = "942--954", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325016", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The comparison of ordinary partitions of a set of objects is well established in the clustering literature, which comprehends several studies on the analysis of the properties of similarity measures for comparing partitions. However, similarity measures for clusterings are not readily applicable to biclusterings, since each bicluster is a tuple of two sets (of rows and columns), whereas a cluster is only a single set (of rows). Some biclustering similarity measures have been defined as minor contributions in papers which primarily report on proposals and evaluation of biclustering algorithms or comparative analyses of biclustering algorithms. The consequence is that some desirable properties of such measures have been overlooked in the literature. We review 14 biclustering similarity measures. We define eight desirable properties of a biclustering measure, discuss their importance, and prove which properties each of the reviewed measures has. We show examples drawn and inspired from important studies in which several biclustering measures convey misleading evaluations due to the absence of one or more of the discussed properties. We also advocate the use of a more general comparison approach that is based on the idea of transforming the original problem of comparing biclusterings into an equivalent problem of comparing clustering partitions with overlapping clusters.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Suvorova:2014:SPC, author = "Yulia M. Suvorova and Maria A. Korotkova and Eugene V. Korotkov", title = "Study of the paired change points in bacterial genes", journal = j-TCBB, volume = "11", number = "5", pages = "955--964", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321154", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It is known that nucleotide sequences are not totally homogeneous and this heterogeneity could not be due to random fluctuations only. Such heterogeneity poses a problem of making sequence segmentation into a set of homogeneous parts divided by the points called ``change points''. In this work we investigated a special case of change points--paired change points (PCP). We used a well-known property of coding sequences--triplet periodicity (TP). The sequences that we are especially interested in consist of three successive parts: the first and the last parts have similar TP while the middle part has different TP type. We aimed to find the genes with PCP and provide explanation for this phenomenon. We developed a mathematical method for the PCP detection based on the new measure of similarity between TP matrices. We investigated 66,936 bacterial genes from 17 bacterial genomes and revealed 2,700 genes with PCP and 6,459 genes with single change point (SCP). We developed a mathematical approach to visualize the PCP cases. We suppose that PCP could be associated with double fusion or insertion events. The results of investigating the sequences with artificial insertions/fusions and distribution of TP inside the genome support the idea that the real number of genes formed by insertion/ fusion events could be 5-7 times greater than the number of genes revealed in the present work.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2014:SBN, author = "Hao Zhang and Xingyuan Wang and Xiaohui Lin", title = "Synchronization of {Boolean} networks with different update schemes", journal = j-TCBB, volume = "11", number = "5", pages = "965--972", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338313", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, the synchronizations of Boolean networks with different update schemes (synchronized Boolean networks and asynchronous Boolean networks) are investigated. All nodes in Boolean network are represented in terms of semi-tensor product. First, we give the concept of inner synchronization and observe that all nodes in a Boolean network are synchronized with each other. Second, we investigate the outer synchronization between a driving Boolean network and a corresponding response Boolean network. We provide not only the concept of traditional complete synchronization, but also the anti-synchronization and get the anti-synchronization in simulation. Third, we extend the outer synchronization to asynchronous Boolean network and get the complete synchronization between an asynchronous Boolean network and a response Boolean network. Consequently, theorems for synchronization of Boolean networks and asynchronous Boolean networks are derived. Examples are provided to show the correctness of our theorems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2014:RSA, author = "Jing Li and Jun Hu and Matthew Newman and Kejun Liu and Huanying Ge", title = "{RNA}-seq analysis pipeline based on {Oshell} environment", journal = j-TCBB, volume = "11", number = "5", pages = "973--978", month = sep, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321156", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:35 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Advances in transcriptome sequencing (RNA-Seq) have revolutionized the way to characterize and quantify transcripts. The breakthroughs in RNA-Seq technologies give rise to the ever-increasing volumes of data, making data processing the bottleneck of transcriptome research. It becomes crucial to develop an efficient analysis pipeline to automate RNA-Seq data analysis. Based on Oshell environment, we present here an ultra-fast and powerful RNA-Seq analysis pipeline for quality control, sequence alignment, variation detection, expression quantification and junction discovery. The pipeline runs on both Linux and Windows operating systems, with either stand-alone or cluster computing environment. Parallel computing is also supported for improved processing speed. Oshell is free for non-commercial use at http://omicsoft.com/oshell.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2014:SAI, author = "Yufei Huang and Yidong Chen and Xiaoning Qian", title = "Selected articles from the {2012 IEEE International Workshop on Genomic Signal Processing and Statistics (GENSIPS 2012)}", journal = j-TCBB, volume = "11", number = "6", pages = "981--983", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2353218", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gregory:2014:LFD, author = "Karl B. Gregory and Amin A. Momin and Kevin R. Coombes and Veerabhadran Baladandayuthapani", title = "Latent feature decompositions for integrative analysis of multi-platform genomic data", journal = j-TCBB, volume = "11", number = "6", pages = "984--994", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325035", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Increased availability of multi-platform genomics data on matched samples has sparked research efforts to discover how diverse molecular features interact both within and between platforms. In addition, simultaneous measurements of genetic and epigenetic characteristics illuminate the roles their complex relationships play in disease progression and outcomes. However, integrative methods for diverse genomics data are faced with the challenges of ultra-high dimensionality and the existence of complex interactions both within and between platforms. We propose a novel modeling framework for integrative analysis based on decompositions of the large number of platform-specific features into a smaller number of latent features. Subsequently we build a predictive model for clinical outcomes accounting for both within --- and between-platform interactions based on Bayesian model averaging procedures. Principal components, partial least squares and non-negative matrix factorization as well as sparse counterparts of each are used to define the latent features, and the performance of these decompositions is compared both on real and simulated data. The latent feature interactions are shown to preserve interactions between the original features and not only aid prediction but also allow explicit selection of outcome-related features. The methods are motivated by and applied to a glioblastoma multiforme data set from The Cancer Genome Atlas to predict patient survival times integrating gene expression, microRNA, copy number and methylation data. For the glioblastoma data, we find a high concordance between our selected prognostic genes and genes with known associations with glioblastoma. In addition, our model discovers several relevant cross-platform interactions such as copy number variation associated gene dosing and epigenetic regulation through promoter methylation. On simulated data, we show that our proposed method successfully incorporates interactions within and between genomic platforms to aid accurate prediction and variable selection. Our methods perform best when principal components are used to define the latent features.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Berlow:2014:IAA, author = "Noah Berlow and Saad Haider and Qian Wan and Mathew Geltzeiler and Lara E. Davis and Charles Keller and Ranadip Pal", title = "An integrated approach to anti-cancer drug sensitivity prediction", journal = j-TCBB, volume = "11", number = "6", pages = "995--1008", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A framework for design of personalized cancer therapy requires the ability to predict the sensitivity of a tumor to anticancer drugs. The predictive modeling of tumor sensitivity to anti-cancer drugs has primarily focused on generating functions that map gene expressions and genetic mutation profiles to drug sensitivity. In this paper, we present a new approach for drug sensitivity prediction and combination therapy design based on integrated functional and genomic characterizations. The modeling approach when applied to data from the Cancer Cell Line Encyclopedia shows a significant gain in prediction accuracy as compared to elastic net and random forest techniques based on genomic characterizations. Utilizing a Mouse Embryonal Rhabdomyosarcoma cell culture and a drug screen of 60 targeted drugs, we show that predictive modeling based on functional data alone can also produce high accuracy predictions. The framework also allows us to generate personalized tumor proliferation circuits to gain further insights on the individualized biological pathway.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tian:2014:INB, author = "Ye Tian and Sean S. Wang and Zhen Zhang and Olga C. Rodriguez and Emanuel Petricoin and Ie-Ming Shih and Daniel Chan and Maria Avantaggiati and Guoqiang Yu and Shaozhen Ye and Robert Clarke and Chao Wang and Bai Zhang and Yue Wang and Chris Albanese", title = "Integration of network biology and imaging to study cancer phenotypes and responses", journal = j-TCBB, volume = "11", number = "6", pages = "1009--1019", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338304", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ever growing ``omics'' data and continuously accumulated biological knowledge provide an unprecedented opportunity to identify molecular biomarkers and their interactions that are responsible for cancer phenotypes that can be accurately defined by clinical measurements such as in vivo imaging. Since signaling or regulatory networks are dynamic and context-specific, systematic efforts to characterize such structural alterations must effectively distinguish significant network rewiring from random background fluctuations. Here we introduced a novel integration of network biology and imaging to study cancer phenotypes and responses to treatments at the molecular systems level. Specifically, Differential Dependence Network (DDN) analysis was used to detect statistically significant topological rewiring in molecular networks between two phenotypic conditions, and in vivo Magnetic Resonance Imaging (MRI) was used to more accurately define phenotypic sample groups for such differential analysis. We applied DDN to analyze two distinct phenotypic groups of breast cancer and study how genomic instability affects the molecular network topologies in high-grade ovarian cancer. Further, FDA-approved arsenic trioxide (ATO) and the ND2-SmoA1 mouse model of Medulloblastoma (MB) were used to extend our analyses of combined MRI and Reverse Phase Protein Microarray (RPMA) data to assess tumor responses to ATO and to uncover the complexity of therapeutic molecular biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lu:2014:LPC, author = "Meng Lu and Hye-Seung Lee and David Hadley and Jianhua Z. Huang and Xiaoning Qian", title = "Logistic principal component analysis for rare variants in gene-environment interaction analysis", journal = j-TCBB, volume = "11", number = "6", pages = "1020--1028", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2322371", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The characteristics of low minor allele frequency (MAF) and weak individual effects make genome-wide association studies (GWAS) for rare variant single nucleotide polymorphisms (SNPs) more difficult when using conventional statistical methods. By aggregating the rare variant effects belonging to the same gene, collapsing is the most common way to enhance the detection of rare variant effects for association analyses with a given trait. In this paper, we propose a novel framework of MAF-based logistic principal component analysis (MLPCA) to derive aggregated statistics by explicitly modeling the correlation between rare variant SNP data, which is categorical. The derived aggregated statistics by MLPCA can then be tested as a surrogate variable in regression models to detect the gene-environment interaction from rare variants. In addition, MLPCA searches for the optimal linear combination from the best subset of rare variants according to MAF that has the maximum association with the given trait. We compared the power of our MLPCA-based methods with four existing collapsing methods in gene-environment interaction association analysis using both our simulation data set and Genetic Analysis Workshop 17 (GAW17) data. Our experimental results have demonstrated that MLPCA on two forms of genotype data representations achieves higher statistical power than those existing methods and can be further improved by introducing the appropriate sparsity penalty. The performance improvement by our MLPCA-based methods result from the derived aggregated statistics by explicitly modeling categorical SNP data and searching for the maximum associated subset of SNPs for collapsing, which helps better capture the combined effect from individual rare variants and the interaction with environmental factors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sajjadi:2014:NBM, author = "Seyed Javad Sajjadi and Xiaoning Qian and Bo Zeng and Amin Ahmadi Adl", title = "Network-based methods to identify highly discriminating subsets of biomarkers", journal = j-TCBB, volume = "11", number = "6", pages = "1029--1037", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325014", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Complex diseases such as various types of cancer and diabetes are conjectured to be triggered and influenced by a combination of genetic and environmental factors. To integrate potential effects from interplay among underlying candidate factors, we propose a new network-based framework to identify effective biomarkers by searching for groups of synergistic risk factors with high predictive power to disease outcome. An interaction network is constructed with node weights representing individual predictive power of candidate factors and edge weights capturing pairwise synergistic interactions among factors. We then formulate this network-based biomarker identification problem as a novel graph optimization model to search for multiple cliques with maximum overall weight, which we denote as the Maximum Weighted Multiple Clique Problem (MWMCP). To achieve optimal or near optimal solutions, both an analytical algorithm based on column generation method and a fast heuristic for large-scale networks have been derived. Our algorithms for MWMCP have been implemented to analyze two biomedical data sets: a Type 1 Diabetes (T1D) data set from the Diabetes Prevention Trial-Type 1 (DPT-1) study, and a breast cancer genomics data set for metastasis prognosis. The results demonstrate that our network-based methods can identify important biomarkers with better prediction accuracy compared to the conventional feature selection that only considers individual effects.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2014:BSB, author = "Yanxun Xu and Xiaofeng Zheng and Yuan Yuan and Marcos R. Estecio and Jean-Pierre Issa and Peng Qiu and Yuan Ji and Shoudan Liang", title = "{BM}-{SNP}: a {Bayesian} model for {SNP} calling using high throughput sequencing data", journal = j-TCBB, volume = "11", number = "6", pages = "1038--1044", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321407", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A single-nucleotide polymorphism (SNP) is a sole base change in the DNA sequence and is the most common polymorphism. Detection and annotation of SNPs are among the central topics in biomedical research as SNPs are believed to play important roles on the manifestation of phenotypic events, such as disease susceptibility. To take full advantage of the next-generation sequencing (NGS) technology, we propose a Bayesian approach, BM-SNP, to identify SNPs based on the posterior inference using NGS data. In particular, BM-SNP computes the posterior probability of nucleotide variation at each covered genomic position using the contents and frequency of the mapped short reads. The position with a high posterior probability of nucleotide variation is flagged as a potential SNP. We apply BM-SNP to two cell-line NGS data, and the results show a high ratio of overlap ({$>$95} percent) with the dbSNP database. Compared with MAQ, BM-SNP identifies more SNPs that are in dbSNP, with higher quality. The SNPs that are called only by BM-SNP but not in dbSNP may serve as new discoveries. The proposed BM-SNP method integrates information from multiple aspects of NGS data, and therefore achieves high detection power. BM-SNP is fast, capable of processing whole genome data at 20-fold average coverage in a short amount of time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qiu:2014:UHD, author = "Peng Qiu", title = "Unfold high-dimensional clouds for exhaustive gating of flow cytometry data", journal = j-TCBB, volume = "11", number = "6", pages = "1045--1051", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321403", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Flow cytometry is able to measure the expressions of multiple proteins simultaneously at the single-cell level. A flow cytometry experiment on one biological sample provides measurements of several protein markers on or inside a large number of individual cells in that sample. Analysis of such data often aims to identify subpopulations of cells with distinct phenotypes. Currently, the most widely used analytical approach in the flow cytometry community is manual gating on a sequence of nested biaxial plots, which is highly subjective, labor intensive, and not exhaustive. To address those issues, a number of methods have been developed to automate the gating analysis by clustering algorithms. However, completely removing the subjectivity can be quite challenging. This paper describes an alternative approach. Instead of automating the analysis, we develop novel visualizations to facilitate manual gating. The proposed method views single-cell data of one biological sample as a high-dimensional point cloud of cells, derives the skeleton of the cloud, and unfolds the skeleton to generate 2D visualizations. We demonstrate the utility of the proposed visualization using real data, and provide quantitative comparison to visualizations generated from principal component analysis and multidimensional scaling.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bitar:2014:BPC, author = "Main{\'a} Bitar and Gl{\'o}ria Regina Franco", title = "A basic protein comparative three-dimensional modeling methodological workflow theory and practice", journal = j-TCBB, volume = "11", number = "6", pages = "1052--1065", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325018", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "When working with proteins and studying its properties, it is crucial to have access to the three-dimensional structure of the molecule. If experimentally solved structures are not available, comparative modeling techniques can be used to generate useful protein models to subsidize structure-based research projects. In recent years, with Bioinformatics becoming the basis for the study of protein structures, there is a crescent need for the exposure of details about the algorithms behind the software and servers, as well as a need for protocols to guide in silico predictive experiments. In this article, we explore different steps of the comparative modeling technique, such as template identification, sequence alignment, generation of candidate structures and quality assessment, its peculiarities and theoretical description. We then present a practical step-by-step workflow, to support the Biologist on the in silico generation of protein structures. Finally, we explore further steps on comparative modeling, presenting perspectives to the study of protein structures through Bioinformatics. We trust that this is a thorough guide for beginners that wish to work on the comparative modeling of proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhan:2014:PEM, author = "Choujun Zhan and Wuchao Situ and Lam Fat Yeung and Peter Wai-Ming Tsang and Genke Yang", title = "A parameter estimation method for biological systems modelled by {ODE\slash DDE} models using spline approximation and differential evolution algorithm", journal = j-TCBB, volume = "11", number = "6", pages = "1066--1076", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2322360", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The inverse problem of identifying unknown parameters of known structure dynamical biological systems, which are modelled by ordinary differential equations or delay differential equations, from experimental data is treated in this paper. A two stage approach is adopted: first, combine spline theory and Nonlinear Programming (NLP), the parameter estimation problem is formulated as an optimization problem with only algebraic constraints; then, a new differential evolution (DE) algorithm is proposed to find a feasible solution. The approach is designed to handle problem of realistic size with noisy observation data. Three cases are studied to evaluate the performance of the proposed algorithm: two are based on benchmark models with priori-determined structure and parameters; the other one is a particular biological system with unknown model structure. In the last case, only a set of observation data available and in this case a nominal model is adopted for the identification. All the test systems were successfully identified by using a reasonable amount of experimental data within an acceptable computation time. Experimental evaluation reveals that the proposed method is capable of fast estimation on the unknown parameters with good precision.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shiraishi:2014:EVA, author = "Fumihide Shiraishi and Erika Yoshida and Eberhard O. Voit", title = "An efficient and very accurate method for calculating steady-state sensitivities in metabolic reaction systems", journal = j-TCBB, volume = "11", number = "6", pages = "1077--1086", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338311", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Stability and sensitivity analyses of biological systems require the ad hoc writing of computer code, which is highly dependent on the particular model and burdensome for large systems. We propose a very accurate strategy to overcome this challenge. Its core concept is the conversion of the model into the format of biochemical systems theory (BST), which greatly facilitates the computation of sensitivities. First, the steady state of interest is determined by integrating the model equations toward the steady state and then using a Newton--Raphson method to fine-tune the result. The second step of conversion into the BST format requires several instances of numerical differentiation. The accuracy of this task is ensured by the use of a complex-variable Taylor scheme for all differentiation steps. The proposed strategy is implemented in a new software program, COSMOS, which automates the stability and sensitivity analysis of essentially arbitrary ODE models in a quick, yet highly accurate manner. The methods underlying the process are theoretically analyzed and illustrated with four representative examples: a simple metabolic reaction model; a model of aspartate-derived amino acid biosynthesis; a TCA-cycle model; and a modified TCA-cycle model. COSMOS has been deposited to https://github.com/BioprocessdesignLab/COSMOS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Charuvaka:2014:CPS, author = "Anveshi Charuvaka and Huzefa Rangwala", title = "Classifying protein sequences using regularized multi-task learning", journal = j-TCBB, volume = "11", number = "6", pages = "1087--1098", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338303", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Classification problems in which several learning tasks are organized hierarchically pose a special challenge because the hierarchical structure of the problems needs to be considered. Multi-task learning (MTL) provides a framework for dealing with such interrelated learning tasks. When two different hierarchical sources organize similar information, in principle, this combined knowledge can be exploited to further improve classification performance. We have studied this problem in the context of protein structure classification by integrating the learning process for two hierarchical protein structure classification database, SCOP and CATH. Our goal is to accurately predict whether a given protein belongs to a particular class in these hierarchies using only the amino acid sequences. We have utilized the recent developments in multi-task learning to solve the interrelated classification problems. We have also evaluated how the various relationships between tasks affect the classification performance. Our evaluations show that learning schemes in which both the classification databases are used outperform the schemes which utilize only one of them.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{David:2014:CEF, author = "Laszlo David and Alexander Bockmayr", title = "Computing elementary flux modes involving a set of target reactions", journal = j-TCBB, volume = "11", number = "6", pages = "1099--1107", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343964", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Elementary flux mode (EM) computation is an important tool in the constraint-based analysis of genome-scale metabolic networks. Due to the combinatorial complexity of these networks, as well as the advances in the level of detail to which they can be reconstructed, an exhaustive enumeration of all EMs is often not practical. Therefore, in recent years interest has shifted towards searching EMs with specific properties. We present a novel method that allows computing EMs containing a given set of target reactions. This generalizes previous algorithms where the set of target reactions consists of a single reaction. In the one-reaction case, our method compares favorably to the previous approaches. In addition, we present several applications of our algorithm for computing EMs containing two target reactions in genome-scale metabolic networks. A software tool implementing the algorithms described in this paper is available at https://sourceforge.net/projects/caefm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rizvi:2014:DRO, author = "Ahsan Z. Rizvi and C. Bhattacharya", title = "Detection of replication origin sites in herpesvirus genomes by clustering and scoring of palindromes with quadratic entropy measures", journal = j-TCBB, volume = "11", number = "6", pages = "1108--1118", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2330622", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Replication in herpesvirus genomes is a major concern of public health as they multiply rapidly during the lytic phase of infection that cause maximum damage to the host cells. Earlier research has established that sites of replication origin are dominated by high concentration of rare palindrome sequences of DNA. Computational methods are devised based on scoring to determine the concentration of palindromes. In this paper, we propose both extraction and localization of rare palindromes in an automated manner. Discrete Cosine Transform (DCT-II), a widely recognized image compression algorithm is utilized here to extract palindromic sequences based on their reverse complimentary symmetry property of existence. We formulate a novel approach to localize the rare palindrome clusters by devising a Minimum Quadratic Entropy (MQE) measure based on the Renyi's Quadratic Entropy (RQE) function. Experimental results over a large number of herpesvirus genomes show that the RQE based scoring of rare palindromes have higher order of sensitivity, and lesser false alarm in detecting concentration of rare palindromes and thereby sites of replication origin.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Taha:2014:DSR, author = "Kamal Taha", title = "Determining semantically related significant genes", journal = j-TCBB, volume = "11", number = "6", pages = "1119--1130", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2344668", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "GO relation embodies some aspects of existence dependency. If GO term x is existence-dependent on GO term y, the presence of y implies the presence of x. Therefore, the genes annotated with the function of the GO term y are usually functionally and semantically related to the genes annotated with the function of the GO term x. A large number of gene set enrichment analysis methods have been developed in recent years for analyzing gene sets enrichment. However, most of these methods overlook the structural dependencies between GO terms in GO graph by not considering the concept of existence dependency. We propose in this paper a biological search engine called RSGSearch that identifies enriched sets of genes annotated with different functions using the concept of existence dependency. We observe that GO term x cannot be existence-dependent on GO term y, if x and y have the same specificity (biological characteristics). After encoding into a numeric format the contributions of GO terms annotating target genes to the semantics of their lowest common ancestors (LCAs), RSGSearch uses microarray experiment to identify the most significant LCA that annotates the result genes. We evaluated RSGSearch experimentally and compared it with five gene set enrichment systems. Results showed marked improvement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rathore:2014:GGE, author = "Saima Rathore and Mutawarra Hussain and Asifullah Khan", title = "{GECC}: gene expression based ensemble classification of colon samples", journal = j-TCBB, volume = "11", number = "6", pages = "1131--1145", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2344655", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene expression deviates from its normal composition in case a patient has cancer. This variation can be used as an effective tool to find cancer. In this study, we propose a novel gene expressions based colon classification scheme (GECC) that exploits the variations in gene expressions for classifying colon gene samples into normal and malignant classes. Novelty of GECC is in two complementary ways. First, to cater overwhelmingly larger size of gene based data sets, various feature extraction strategies, like, chi-square, F-Score, principal component analysis (PCA) and minimum redundancy and maximum relevancy (mRMR) have been employed, which select discriminative genes amongst a set of genes. Second, a majority voting based ensemble of support vector machine (SVM) has been proposed to classify the given gene based samples. Previously, individual SVM models have been used for colon classification, however, their performance is limited. In this research study, we propose an SVM-ensemble based new approach for gene based classification of colon, wherein the individual SVM models are constructed through the learning of different SVM kernels, like, linear, polynomial, radial basis function (RBF), and sigmoid. The predicted results of individual models are combined through majority voting. In this way, the combined decision space becomes more discriminative. The proposed technique has been tested on four colon, and several other binary-class gene expression data sets, and improved performance has been achieved compared to previously reported gene based colon cancer detection techniques. The computational time required for the training and testing of 208 $ \times $ 5,851 data set has been 591.01 and 0.019 s, respectively.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liao:2014:GSU, author = "Bo Liao and Yan Jiang and Wei Liang and Wen Zhu and Lijun Cai and Zhi Cao", title = "Gene selection using locality sensitive {Laplacian} score", journal = j-TCBB, volume = "11", number = "6", pages = "1146--1156", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2328334", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene selection based on microarray data, is highly important for classifying tumors accurately. Existing gene selection schemes are mainly based on ranking statistics. From manifold learning standpoint, local geometrical structure is more essential to characterize features compared with global information. In this study, we propose a supervised gene selection method called locality sensitive Laplacian score (LSLS), which incorporates discriminative information into local geometrical structure, by minimizing local within-class information and maximizing local between-class information simultaneously. In addition, variance information is considered in our algorithm framework. Eventually, to find more superior gene subsets, which is significant for biomarker discovery, a two-stage feature selection method that combines the LSLS and wrapper method (sequential forward selection or sequential backward selection) is presented. Experimental results of six publicly available gene expression profile data sets demonstrate the effectiveness of the proposed approach compared with a number of state-of-the-art gene selection methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Stock:2014:IFR, author = "Michiel Stock and Thomas Fober and Eyke H{\"u}llermeier and Serghei Glinca and Gerhard Klebe and Tapio Pahikkala and Antti Airola and Bernard {De Baets} and Willem Waegeman", title = "Identification of functionally related enzymes by learning-to-rank methods", journal = j-TCBB, volume = "11", number = "6", pages = "1157--1169", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338308", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Enzyme sequences and structures are routinely used in the biological sciences as queries to search for functionally related enzymes in online databases. To this end, one usually departs from some notion of similarity, comparing two enzymes by looking for correspondences in their sequences, structures or surfaces. For a given query, the search operation results in a ranking of the enzymes in the database, from very similar to dissimilar enzymes, while information about the biological function of annotated database enzymes is ignored. In this work, we show that rankings of that kind can be substantially improved by applying kernel-based learning algorithms. This approach enables the detection of statistical dependencies between similarities of the active cleft and the biological function of annotated enzymes. This is in contrast to search-based approaches, which do not take annotated training data into account. Similarity measures based on the active cleft are known to outperform sequence-based or structure-based measures under certain conditions. We consider the Enzyme Commission (EC) classification hierarchy for obtaining annotated enzymes during the training phase. The results of a set of sizeable experiments indicate a consistent and significant improvement for a set of similarity measures that exploit information about small cavities in the surface of enzymes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mukhopadhyay:2014:INR, author = "Anirban Mukhopadhyay and Monalisa Mandal", title = "Identifying non-redundant gene markers from microarray data: a multiobjective variable length {PSO}-based approach", journal = j-TCBB, volume = "11", number = "6", pages = "1170--1183", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2323065", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identifying relevant genes which are responsible for various types of cancer is an important problem. In this context, important genes refer to the marker genes which change their expression level in correlation with the risk or progression of a disease, or with the susceptibility of the disease to a given treatment. Gene expression profiling by microarray technology has been successfully applied to classification and diagnostic prediction of cancers. However, extracting these marker genes from a huge set of genes contained by the microarray data set is a major problem. Most of the existing methods for identifying marker genes find a set of genes which may be redundant in nature. Motivated by this, a multiobjective optimization method has been proposed which can find a small set of non-redundant disease related genes providing high sensitivity and specificity simultaneously. In this article, the optimization problem has been modeled as a multiobjective one which is based on the framework of variable length particle swarm optimization. Using some real-life data sets, the performance of the proposed algorithm has been compared with that of other state-of-the-art techniques.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zarai:2014:MPT, author = "Yoram Zarai and Michael Margaliot and Tamir Tuller", title = "Maximizing protein translation rate in the ribosome flow model: the homogeneous case", journal = j-TCBB, volume = "11", number = "6", pages = "1184--1195", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2330621", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene translation is the process in which intracellular macro-molecules, called ribosomes, decode genetic information in the mRNA chain into the corresponding proteins. Gene translation includes several steps. During the elongation step, ribosomes move along the mRNA in a sequential manner and link amino-acids together in the corresponding order to produce the proteins. The homogeneous ribosome flow model (HRFM) is a deterministic computational model for translation-elongation under the assumption of constant elongation rates along the mRNA chain. The HRFM is described by a set of n first-order nonlinear ordinary differential equations, where n represents the number of sites along the mRNA chain. The HRFM also includes two positive parameters: ribosomal initiation rate and the (constant) elongation rate. In this paper, we show that the steady-state translation rate in the HRFM is a concave function of its parameters. This means that the problem of determining the parameter values that maximize the translation rate is relatively simple. Our results may contribute to a better understanding of the mechanisms and evolution of translation-elongation. We demonstrate this by using the theoretical results to estimate the initiation rate in M. musculus embryonic stem cell. The underlying assumption is that evolution optimized the translation mechanism. For the infinite-dimensional HRFM, we derive a closed-form solution to the problem of determining the initiation and transition rates that maximize the protein translation rate. We show that these expressions provide good approximations for the optimal values in the n -dimensional HRFM already for relatively small values of n. These results may have applications for synthetic biology where an important problem is to re-engineer genomic systems in order to maximize the protein production rate.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kalathiya:2014:MME, author = "Umesh Kalathiya and Monikaben Padariya and Maciej Baginski", title = "Molecular modeling and evaluation of novel dibenzopyrrole derivatives as telomerase inhibitors and potential drug for cancer therapy", journal = j-TCBB, volume = "11", number = "6", pages = "1196--1207", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2326860", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "During previous years, many studies on synthesis, as well as on anti-tumor, anti-inflammatory and anti-bacterial activities of the pyrazole derivatives have been described. Certain pyrazole derivatives exhibit important pharmacological activities and have proved to be useful template in drug research. Considering importance of pyrazole template, in current work the series of novel inhibitors were designed by replacing central ring of acridine with pyrazole ring. These heterocyclic compounds were proposed as a new potential base for telomerase inhibitors. Obtained dibenzopyrrole structure was used as a novel scaffold structure and extension of inhibitors was done by different functional groups. Docking of newly designed compounds in the telomerase active site (telomerase catalytic subunit TERT) was carried out. All dibenzopyrrole derivatives were evaluated by three docking programs: CDOCKER, Ligandfit docking (Scoring Functions) and AutoDock. Compound C\_9g, C\_9k and C\_9l performed best in comparison to all designed inhibitors during the docking in all methods and in interaction analysis. Introduction of pyrazole and extension of dibenzopyrrole in compounds confirm that such compound may act as potential telomerase inhibitors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mirzal:2014:NTR, author = "Andri Mirzal", title = "Nonparametric {Tikhonov} regularized {NMF} and its application in cancer clustering", journal = j-TCBB, volume = "11", number = "6", pages = "1208--1217", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2328342", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Tikhonov regularized nonnegative matrix factorization (TNMF) is an NMF objective function that enforces smoothness on the computed solutions, and has been successfully applied to many problem domains including text mining, spectral data analysis, and cancer clustering. There is, however, an issue that is still insufficiently addressed in the development of TNMF algorithms, i.e., how to develop mechanisms that can learn the regularization parameters directly from the data sets. The common approach is to use fixed values based on a priori knowledge about the problem domains. However, from the linear inverse problems study it is known that the quality of the solutions of the Tikhonov regularized least square problems depends heavily on the choosing of appropriate regularization parameters. Since least squares are the building blocks of the NMF, it can be expected that similar situation also applies to the NMF. In this paper, we propose two formulas to automatically learn the regularization parameters from the data set based on the L-curve approach. We also develop a convergent algorithm for the TNMF based on the additive update rules. Finally, we demonstrate the use of the proposed algorithm in cancer clustering tasks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kasarapu:2014:RPF, author = "Parthan Kasarapu and Maria {Garcia De La Banda} and Arun S. Konagurthu", title = "On representing protein folding patterns using non-linear parametric curves", journal = j-TCBB, volume = "11", number = "6", pages = "1218--1228", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338319", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proteins fold into complex three-dimensional shapes. Simplified representations of their shapes are central to rationalise, compare, classify, and interpret protein structures. Traditional methods to abstract protein folding patterns rely on representing their standard secondary structural elements (helices and strands of sheet) using line segments. This results in ignoring a significant proportion of structural information. The motivation of this research is to derive mathematically rigorous and biologically meaningful abstractions of protein folding patterns that maximize the economy of structural description and minimize the loss of structural information. We report on a novel method to describe a protein as a non-overlapping set of parametric three dimensional curves of varying length and complexity. Our approach to this problem is supported by information theory and uses the statistical framework of minimum message length (MML) inference. We demonstrate the effectiveness of our non-linear abstraction to support efficient and effective comparison of protein folding patterns on a large scale.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Disanto:2014:NRS, author = "Filippo Disanto and Noah A. Rosenberg", title = "On the number of ranked species trees producing anomalous ranked gene trees", journal = j-TCBB, volume = "11", number = "6", pages = "1229--1238", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343977", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analysis of probability distributions conditional on species trees has demonstrated the existence of anomalous ranked gene trees (ARGTs), ranked gene trees that are more probable than the ranked gene tree that accords with the ranked species tree. Here, to improve the characterization of ARGTs, we study enumerative and probabilistic properties of two classes of ranked labeled species trees, focusing on the presence or avoidance of certain subtree patterns associated with the production of ARGTs. We provide exact enumerations and asymptotic estimates for cardinalities of these sets of trees, showing that as the number of species increases without bound, the fraction of all ranked labeled species trees that are ARGT-producing approaches 1. This result extends beyond earlier existence results to provide a probabilistic claim about the frequency of ARGTs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ahmed:2014:SSC, author = "Hasin Afzal Ahmed and Priyakshi Mahanta and Dhruba Kumar Bhattacharyya and Jugal Kumar Kalita", title = "Shifting-and-scaling correlation based biclustering algorithm", journal = j-TCBB, volume = "11", number = "6", pages = "1239--1252", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2323054", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The existence of various types of correlations among the expressions of a group of biologically significant genes poses challenges in developing effective methods of gene expression data analysis. The initial focus of computational biologists was to work with only absolute and shifting correlations. However, researchers have found that the ability to handle shifting-and-scaling correlation enables them to extract more biologically relevant and interesting patterns from gene microarray data. In this paper, we introduce an effective shifting-and-scaling correlation measure named Shifting and Scaling Similarity (SSSim), which can detect highly correlated gene pairs in any gene expression data. We also introduce a technique named Intensive Correlation Search (ICS) biclustering algorithm, which uses SSSim to extract biologically significant biclusters from a gene expression data set. The technique performs satisfactorily with a number of benchmarked gene expression data sets when evaluated in terms of functional categories in Gene Ontology database.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kobayashi:2014:ISB, author = "Koichi Kobayashi and Kunihiko Hiraishi", title = "{ILP\slash SMT}-based method for design of {Boolean} networks based on singleton attractors", journal = j-TCBB, volume = "11", number = "6", pages = "1253--1259", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2325011", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Attractors in gene regulatory networks represent cell types or states of cells. In system biology and synthetic biology, it is important to generate gene regulatory networks with desired attractors. In this paper, we focus on a singleton attractor, which is also called a fixed point. Using a Boolean network (BN) model, we consider the problem of finding Boolean functions such that the system has desired singleton attractors and has no undesired singleton attractors. To solve this problem, we propose a matrix-based representation of BNs. Using this representation, the problem of finding Boolean functions can be rewritten as an Integer Linear Programming (ILP) problem and a Satisfiability Modulo Theories (SMT) problem. Furthermore, the effectiveness of the proposed method is shown by a numerical example on a WNT5A network, which is related to melanoma. The proposed method provides us a basic method for design of gene regulatory networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Loohuis:2014:IDD, author = "Loes Olde Loohuis and Andreas Witzel and Bud Mishra", title = "Improving detection of driver genes: power-law null model of copy number variation in cancer", journal = j-TCBB, volume = "11", number = "6", pages = "1260--1263", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351805", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we study Copy Number Variation (CNV) data. The underlying process generating CNV segments is generally assumed to be memory-less, giving rise to an exponential distribution of segment lengths. In this paper, we provide evidence from cancer patient data, which suggests that this generative model is too simplistic, and that segment lengths follow a power-law distribution instead. We conjecture a simple preferential attachment generative model that provides the basis for the observed power-law distribution. We then show how an existing statistical method for detecting cancer driver genes can be improved by incorporating the power-law distribution in the null model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2014:OMI, author = "Haiying Wang and Huiru Zheng", title = "Organized modularity in the interactome: evidence from the analysis of dynamic organization in the cell cycle", journal = j-TCBB, volume = "11", number = "6", pages = "1264--1270", month = nov, year = "2014", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2318715", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Feb 14 10:45:39 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The organization of global protein interaction networks (PINs) has been extensively studied and heatedly debated. We revisited this issue in the context of the analysis of dynamic organization of a PIN in the yeast cell cycle. Statistically significant bimodality was observed when analyzing the distribution of the differences in expression peak between periodically expressed partners. A close look at their behavior revealed that date and party hubs derived from this analysis have some distinct features. There are no significant differences between them in terms of protein essentiality, expression correlation and semantic similarity derived from gene ontology (GO) biological process hierarchy. However, date hubs exhibit significantly greater values than party hubs in terms of semantic similarity derived from both GO molecular function and cellular component hierarchies. Relating to three-dimensional structures, we found that both single and multi-interface proteins could become date hubs coordinating multiple functions performed at different times while party hubs are mainly multiinterface proteins. Furthermore, we constructed and analyzed a PPI network specific to the human cell cycle and highlighted that the dynamic organization in human interactome is far more complex than the dichotomy of hubs observed in the yeast cell cycle.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2015:EEC, author = "Ying Xu", title = "Editorial from the {Editor-in-Chief}", journal = j-TCBB, volume = "12", number = "1", pages = "1--1", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394592", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Aluru:2015:GEI, author = "Srinivas Aluru and Donna K. Slonim", title = "{Guest Editors}' introduction: selected papers from {ACM-BCB 2013}", journal = j-TCBB, volume = "12", number = "1", pages = "2--3", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2389551", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Daniels:2015:MRH, author = "Noah M. Daniels and Andrew Gallant and Norman Ramsey and Lenore J. Cowen", title = "{MRFy}: remote homology detection for beta-structural proteins using {Markov} random fields and stochastic search", journal = j-TCBB, volume = "12", number = "1", pages = "4--16", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2344682", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We introduce MRFy, a tool for protein remote homology detection that captures beta-strand dependencies in the Markov random field. Over a set of 11 SCOP beta-structural superfamilies, MRFy shows a 14 percent improvement in mean Area Under the Curve for the motif recognition problem as compared to HMMER, 25 percent improvement as compared to RAPTOR, 14 percent improvement as compared to HHPred, and a 18 percent improvement as compared to CNFPred and RaptorX. MRFy was implemented in the Haskell functional programming language, and parallelizes well on multi-core systems. MRFy is available, as source code as well as an executable, from http://mrfy.cs.tufts.edu/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Torii:2015:RPG, author = "Manabu Torii and Cecilia N. Arighi and Gang Li and Qinghua Wang and Cathy H. Wu and K. Vijay-Shanker", title = "{RLIMS-P 2.0}: a generalizable rule-based information extraction system for literature mining of protein phosphorylation information", journal = j-TCBB, volume = "12", number = "1", pages = "17--29", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2372765", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We introduce RLIMS-P version 2.0, an enhanced rule-based information extraction (IE) system for mining kinase, substrate, and phosphorylation site information from scientific literature. Consisting of natural language processing and IE modules, the system has integrated several new features, including the capability of processing full-text articles and generalizability towards different post-translational modifications (PTMs). To evaluate the system, sets of abstracts and full-text articles, containing a variety of textual expressions, were annotated. On the abstract corpus, the system achieved F-scores of 0.91, 0.92, and 0.95 for kinases, substrates, and sites, respectively. The corresponding scores on the full-text corpus were 0.88, 0.91, and 0.92. It was additionally evaluated on the corpus of the 2013 BioNLP-ST GE task, and achieved an F-score of 0.87 for the phosphorylation core task, improving upon the results previously reported on the corpus. Full-scale processing of all abstracts in MEDLINE and all articles in PubMed Central Open Access Subset has demonstrated scalability for mining rich information in literature, enabling its adoption for biocuration and for knowledge discovery. The new system is generalizable and it will be adapted to tackle other major PTM types. RLIMS-P 2.0 online system is available online (http://proteininformationresource.org/rlimsp/) and the developed corpora are available from iProLINK (http://proteininformationresource.org/iprolink/).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2015:PDC, author = "Kun Wang and Avinash Das and Zheng-Mei Xiong and Kan Cao and Sridhar Hannenhalli", title = "Phenotype-dependent coexpression gene clusters: application to normal and premature ageing", journal = j-TCBB, volume = "12", number = "1", pages = "30--39", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359446", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Hutchinson Gilford progeria syndrome (HGPS) is a rare genetic disease with symptoms of aging at a very early age. Its molecular basis is not entirely clear, although profound gene expression changes have been reported, and there are some known and other presumed overlaps with normal aging process. Identification of genes with aging --- or HGPS-associated expression changes is thus an important problem. However, standard regression approaches are currently unsuitable for this task due to limited sample sizes, thus motivating development of alternative approaches. Here, we report a novel iterative multiple regression approach that leverages co-expressed gene clusters to identify gene clusters whose expression co-varies with age and/or HGPS. We have applied our approach to novel RNA-seq profiles in fibroblast cell cultures at three different cellular ages, both from HGPS patients and normal samples. After establishing the robustness of our approach, we perform a comparative investigation of biological processes underlying normal aging and HGPS. Our results recapitulate previously known processes underlying aging as well as suggest numerous unique processes underlying aging and HGPS. The approach could also be useful in detecting phenotype-dependent co-expression gene clusters in other contexts with limited sample sizes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Faisal:2015:GNA, author = "Fazle Elahi Faisal and Han Zhao and Tijana Milenkovi{\'c}", title = "Global network alignment in the context of aging", journal = j-TCBB, volume = "12", number = "1", pages = "40--52", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2326862", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analogous to sequence alignment, network alignment (NA) can be used to transfer biological knowledge across species between conserved network regions. NA faces two algorithmic challenges: (1) Which cost function to use to capture ``similarities'' between nodes in different networks? (2) Which alignment strategy to use to rapidly identify ``high-scoring'' alignments from all possible alignments? We ``break down'' existing state-of-the-art methods that use both different cost functions and different alignment strategies to evaluate each combination of their cost functions and alignment strategies. We find that a combination of the cost function of one method and the alignment strategy of another method beats the existing methods. Hence, we propose this combination as a novel superior NA method. Then, since human aging is hard to study experimentally due to long lifespan, we use NA to transfer aging-related knowledge from well annotated model species to poorly annotated human. By doing so, we produce novel human aging-related knowledge, which complements currently available knowledge about aging that has been obtained mainly by sequence alignment. We demonstrate significant similarity between topological and functional properties of our novel predictions and those of known aging-related genes. We are the first to use NA to learn more about aging.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gabr:2015:RAP, author = "Haitham Gabr and Andrei Todor and Alin Dobra and Tamer Kahveci", title = "Reachability analysis in probabilistic biological networks", journal = j-TCBB, volume = "12", number = "1", pages = "53--66", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343967", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extra-cellular molecules trigger a response inside the cell by initiating a signal at special membrane receptors (i.e., sources), which is then transmitted to reporters (i.e., targets) through various chains of interactions among proteins. Understanding whether such a signal can reach from membrane receptors to reporters is essential in studying the cell response to extracellular events. This problem is drastically complicated due to the unreliability of the interaction data. In this paper, we develop a novel method, called PReach (Probabilistic Reachability), that precisely computes the probability that a signal can reach from a given collection of receptors to a given collection of reporters when the underlying signaling network is uncertain. This is a very difficult computational problem with no known polynomial-time solution. PReach represents each uncertain interaction as a bi-variate polynomial. It transforms the reachability problem to a polynomial multiplication problem. We introduce novel polynomial collapsing operators that associate polynomial terms with possible paths between sources and targets as well as the cuts that separate sources from targets. These operators significantly shrink the number of polynomial terms and thus the running time. PReach has much better time complexity than the recent solutions for this problem. Our experimental results on real data sets demonstrate that this improvement leads to orders of magnitude of reduction in the running time over the most recent methods. Availability: All the data sets used, the software implemented and the alignments found in this paper are available at http://bioinformatics.cise.ufl.edu/PReach/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ye:2015:GAM, author = "Yongtao Ye and David Wai-lok Cheung and Yadong Wang and Siu-Ming Yiu and Qing Zhang and Tak-Wah Lam and Hing-Fung Ting", title = "{GLProbs}: aligning multiple sequences adaptively", journal = j-TCBB, volume = "12", number = "1", pages = "67--78", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2316820", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper introduces a simple and effective approach to improve the accuracy of multiple sequence alignment. We use a natural measure to estimate the similarity of the input sequences, and based on this measure, we align the input sequences differently. For example, for inputs with high similarity, we consider the whole sequences and align them globally, while for those with moderately low similarity, we may ignore the flank regions and align them locally. To test the effectiveness of this approach, we have implemented a multiple sequence alignment tool called GLProbs and compared its performance with about one dozen leading alignment tools on three benchmark alignment databases, and GLProbs's alignments have the best scores in almost all testings. We have also evaluated the practicability of the alignments of GLProbs by applying the tool to three biological applications, namely phylogenetic trees construction, protein secondary structure prediction and the detection of high risk members for cervical cancer in the HPV-E6 family, and the results are very encouraging.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2015:PIR, author = "Hui Li and Xiaoyi Li and Murali Ramanathan and Aidong Zhang", title = "Prediction and informative risk factor selection of bone diseases", journal = j-TCBB, volume = "12", number = "1", pages = "79--91", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2330579", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the booming of healthcare industry and the overwhelming amount of electronic health records (EHRs) shared by healthcare institutions and practitioners, we take advantage of EHR data to develop an effective disease risk management model that not only models the progression of the disease, but also predicts the risk of the disease for early disease control or prevention. Existing models for answering these questions usually fall into two categories: the expert knowledge based model or the handcrafted feature set based model. To fully utilize the whole EHR data, we will build a framework to construct an integrated representation of features from all available risk factors in the EHR data and use these integrated features to effectively predict osteoporosis and bone fractures. We will also develop a framework for informative risk factor selection of bone diseases. A pair of models for two contrast cohorts (e.g., diseased patients versus non-diseased patients) will be established to discriminate their characteristics and find the most informative risk factors. Several empirical results on a real bone disease data set show that the proposed framework can successfully predict bone diseases and select informative risk factors that are beneficial and useful to guide clinical decisions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DeJesus:2015:CUM, author = "Michael A. DeJesus and Thomas R. Ioerger", title = "Capturing uncertainty by modeling local transposon insertion frequencies improves discrimination of essential genes", journal = j-TCBB, volume = "12", number = "1", pages = "92--102", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2326857", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Transposon mutagenesis experiments enable the identification of essential genes in bacteria. Deep-sequencing of mutant libraries provides a large amount of high-resolution data on essentiality. Statistical methods developed to analyze this data have traditionally assumed that the probability of observing a transposon insertion is the same across the genome. This assumption, however, is inconsistent with the observed insertion frequencies from transposon mutant libraries of M. tuberculosis. We propose a modified Binomial model of essentiality that can characterize the insertion probability of individual genes in which we allow local variation in the background insertion frequency in different non-essential regions of the genome. Using the Metropolis--Hastings algorithm, samples of the posterior insertion probabilities were obtained for each gene, and the probability of each gene being essential is estimated. We compared our predictions to those of previous methods and show that, by taking into consideration local insertion frequencies, our method is capable of making more conservative predictions that better match what is experimentally known about essential and non-essential genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Spencer:2015:DLN, author = "Matt Spencer and Jesse Eickholt and Jianlin Cheng", title = "A deep learning network approach to ab initio protein secondary structure prediction", journal = j-TCBB, volume = "12", number = "1", pages = "103--112", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343960", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ab initio protein secondary structure (SS) predictions are utilized to generate tertiary structure predictions, which are increasingly demanded due to the rapid discovery of proteins. Although recent developments have slightly exceeded previous methods of SS prediction, accuracy has stagnated around 80 percent and many wonder if prediction cannot be advanced beyond this ceiling. Disciplines that have traditionally employed neural networks are experimenting with novel deep learning techniques in attempts to stimulate progress. Since neural networks have historically played an important role in SS prediction, we wanted to determine whether deep learning could contribute to the advancement of this field as well. We developed an SS predictor that makes use of the position-specific scoring matrix generated by PSI-BLAST and deep learning network architectures, which we call DNSS. Graphical processing units and CUDA software optimize the deep network architecture and efficiently train the deep networks. Optimal parameters for the training process were determined, and a workflow comprising three separately trained deep networks was constructed in order to make refined predictions. This deep learning network approach was used to predict SS for a fully independent test dataset of 198 proteins, achieving a Q3 accuracy of 80.7 percent and a Sov accuracy of 74.2 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liao:2015:HCM, author = "Bo Liao and Xiong Li and Lijun Cai and Zhi Cao and Haowen Chen", title = "A hierarchical clustering method of selecting kernel {SNP} to unify informative {SNP} and tag {SNP}", journal = j-TCBB, volume = "12", number = "1", pages = "113--122", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351797", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Various strategies can be used to select representative single nucleotide polymorphisms (SNPs) from a large number of SNPs, such as tag SNP for haplotype coverage and informative SNP for haplotype reconstruction, respectively. Representative SNPs are not only instrumental in reducing the cost of genotyping, but also serve an important function in narrowing the combinatorial space in epistasis analysis. The capacity of kernel SNPs to unify informative SNP and tag SNP is explored, and inconsistencies are minimized in further studies. The correlation between multiple SNPs is formalized using multi-information measures. In extending the correlation, a distance formula for measuring the similarity between clusters is first designed to conduct hierarchical clustering. Hierarchical clustering consists of both information gain and haplotype diversity, so that the proposed approach can achieve unification. The kernel SNPs are then selected from every cluster through the top rank or backward elimination scheme. Using these kernel SNPs, extensive experimental comparisons are conducted between informative SNPs on haplotype reconstruction accuracy and tag SNPs on haplotype coverage. Results indicate that the kernel SNP can practically unify informative SNP and tag SNP and is therefore adaptable to various applications.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chan:2015:MPP, author = "Shing-Chow Chan and Li Zhang and Ho-Chun Wu and Kai-Man Tsui", title = "A maximum a posteriori probability and time-varying approach for inferring gene regulatory networks from time course gene microarray data", journal = j-TCBB, volume = "12", number = "1", pages = "123--135", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343951", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Unlike most conventional techniques with static model assumption, this paper aims to estimate the time-varying model parameters and identify significant genes involved at different timepoints from time course gene microarray data. We first formulate the parameter identification problem as a new maximum a posteriori probability estimation problem so that prior information can be incorporated as regularization terms to reduce the large estimation variance of the high dimensional estimation problem. Under this framework, sparsity and temporal consistency of the model parameters are imposed using L 1-regularization and novel continuity constraints, respectively. The resulting problem is solved using the L-BFGS method with the initial guess obtained from the partial least squares method. A novel forward validation measure is also proposed for the selection of regularization parameters, based on both forward and current prediction errors. The proposed method is evaluated using a synthetic benchmark testing data and a publicly available yeast Saccharomyces cerevisiae cell cycle microarray data. For the latter particularly, a number of significant genes identified at different timepoints are found to be biological significant according to previous findings in biological experiments. These suggest that the proposed approach may serve as a valuable tool for inferring time-varying gene regulatory networks in biological studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fan:2015:AMD, author = "Yetian Fan and Wei Wu and Jie Yang and Wenyu Yang and Rongrong Liu", title = "An algorithm for motif discovery with iteration on lengths of motifs", journal = j-TCBB, volume = "12", number = "1", pages = "136--141", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351793", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analysis of DNA sequence motifs is becoming increasingly important in the study of gene regulation, and the identification of motif in DNA sequences is a complex problem in computational biology. Motif discovery has attracted the attention of more and more researchers, and varieties of algorithms have been proposed. Most existing motif discovery algorithms fix the motif's length as one of the input parameters. In this paper, a novel method is proposed to identify the optimal length of the motif and the optimal motif with that length, through an iteration process on increasing length numbers. For each fixed length, a modified genetic algorithm (GA) is used for finding the optimal motif with that length. Three operators are used in the modified GA: Mutation that is similar to the one used in usual GA but is modified to avoid local optimum in our case, and Addition and Deletion that are proposed by us for the problem. A criterion is given for singling out the optimal length in the increasing motif's lengths. We call this method AMDILM (an algorithm for motif discovery with iteration on lengths of motifs). The experiments on simulated data and real biological data show that AMDILM can accurately identify the optimal motif length. Meanwhile, the optimal motifs discovered by AMDILM are consistent with the real ones and are similar with the motifs obtained by the three well-known methods: Gibbs Sampler, MEME and Weeder.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2015:DBC, author = "Man-Hon Wong and Ho-Yin Sze-To and Leung-Yau Lo and Tak-Ming Chan and Kwong-Sak Leung", title = "Discovering binding cores in {Protein--DNA} binding using association rule mining with statistical measures", journal = j-TCBB, volume = "12", number = "1", pages = "142--154", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343952", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Understanding binding cores is of fundamental importance in deciphering Protein--DNA (TF-TFBS) binding and for the deep understanding of gene regulation. Traditionally, binding cores are identified in resolved high-resolution 3D structures. However, it is expensive, labor-intensive and time-consuming to obtain these structures. Hence, it is promising to discover binding cores computationally on a large scale. Previous studies successfully applied association rule mining to discover binding cores from TF-TFBS binding sequence data only. Despite the successful results, there are limitations such as the use of tight support and confidence thresholds, the distortion by statistical bias in counting pattern occurrences, and the lack of a unified scheme to rank TF-TFBS associated patterns. In this study, we proposed an association rule mining algorithm incorporating statistical measures and ranking to address these limitations. Experimental results demonstrated that, even when the threshold on support was lowered to one-tenth of the value used in previous studies, a satisfactory verification ratio was consistently observed under different confidence levels. Moreover, we proposed a novel ranking scheme for TF-TFBS associated patterns based on p-values and co-support values. By comparing with other discovery approaches, the effectiveness of our algorithm was demonstrated. Eighty-four binding cores with PDB support are uniquely identified.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gorecki:2015:GTD, author = "Pawe G{\'o}recki and Oliver Eulenstein", title = "Gene tree diameter for deep coalescence", journal = j-TCBB, volume = "12", number = "1", pages = "155--165", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351795", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The deep coalescence cost accounts for discord caused by deep coalescence between a gene tree and a species tree. It is a major concern that the diameter of a gene tree (the tree's maximum deep coalescence cost across all species trees) depends on its topology, which can largely obfuscate phylogenetic studies. While this bias can be compensated by normalizing the deep coalescence cost using diameters, obtaining them efficiently has been posed as an open problem by Than and Rosenberg [33]. Here, we resolve this problem by describing a linear time algorithm to compute the diameter of a gene tree. In addition, we provide a complete classification of the species trees yielding this diameter to guide phylogenetic analyses.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2015:HCF, author = "Chao Wang and Xi Li and Peng Chen and Aili Wang and Xuehai Zhou and Hong Yu", title = "Heterogeneous cloud framework for big data genome sequencing", journal = j-TCBB, volume = "12", number = "1", pages = "166--178", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351800", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The next generation genome sequencing problem with short (long) reads is an emerging field in numerous scientific and big data research domains. However, data sizes and ease of access for scientific researchers are growing and most current methodologies rely on one acceleration approach and so cannot meet the requirements imposed by explosive data scales and complexities. In this paper, we propose a novel FPGA-based acceleration solution with MapReduce framework on multiple hardware accelerators. The combination of hardware acceleration and MapReduce execution flow could greatly accelerate the task of aligning short length reads to a known reference genome. To evaluate the performance and other metrics, we conducted a theoretical speedup analysis on a MapReduce programming platform, which demonstrates that our proposed architecture have efficient potential to improve the speedup for large scale genome sequencing applications. Also, as a practical study, we have built a hardware prototype on the real Xilinx FPGA chip. Significant metrics on speedup, sensitivity, mapping quality, error rate, and hardware cost are evaluated, respectively. Experimental results demonstrate that the proposed platform could efficiently accelerate the next generation sequencing problem with satisfactory accuracy and acceptable hardware cost.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peng:2015:IPC, author = "Wei Peng and Jianxin Wang and Bihai Zhao and Lusheng Wang", title = "Identification of protein complexes using weighted {PageRank--Nibble} algorithm and core-attachment structure", journal = j-TCBB, volume = "12", number = "1", pages = "179--192", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343954", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein complexes play a significant role in understanding the underlying mechanism of most cellular functions. Recently, many researchers have explored computational methods to identify protein complexes from protein-protein interaction (PPI) networks. One group of researchers focus on detecting local dense subgraphs which correspond to protein complexes by considering local neighbors. The drawback of this kind of approach is that the global information of the networks is ignored. Some methods such as Markov Clustering algorithm (MCL), PageRank--Nibble are proposed to find protein complexes based on random walk technique which can exploit the global structure of networks. However, these methods ignore the inherent core-attachment structure of protein complexes and treat adjacent node equally. In this paper, we design a weighted PageRank--Nibble algorithm which assigns each adjacent node with different probability, and propose a novel method named WPNCA to detect protein complex from PPI networks by using weighted PageRank--Nibble algorithm and core-attachment structure. Firstly, WPNCA partitions the PPI networks into multiple dense clusters by using weighted PageRank--Nibble algorithm. Then the cores of these clusters are detected and the rest of proteins in the clusters will be selected as attachments to form the final predicted protein complexes. The experiments on yeast data show that WPNCA outperforms the existing methods in terms of both accuracy and p-value. The software for WPNCA is available at ``http://netlab.csu.edu.cn/bioinfomatics/weipeng/WPNCA/download.html''", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Du:2015:IAC, author = "Nan Du and Marc R. Knecht and Mark T. Swihart and Zhenghua Tang and Tiffany R. Walsh and Aidong Zhang", title = "Identifying affinity classes of inorganic materials binding sequences via a graph-based model", journal = j-TCBB, volume = "12", number = "1", pages = "193--204", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2321158", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Rapid advances in bionanotechnology have recently generated growing interest in identifying peptides that bind to inorganic materials and classifying them based on their inorganic material affinities. However, there are some distinct characteristics of inorganic materials binding sequence data that limit the performance of many widely-used classification methods when applied to this problem. In this paper, we propose a novel framework to predict the affinity classes of peptide sequences with respect to an associated inorganic material. We first generate a large set of simulated peptide sequences based on an amino acid transition matrix tailored for the specific inorganic material. Then the probability of test sequences belonging to a specific affinity class is calculated by minimizing an objective function. In addition, the objective function is minimized through iterative propagation of probability estimates among sequences and sequence clusters. Results of computational experiments on two real inorganic material binding sequence data sets show that the proposed framework is highly effective for identifying the affinity classes of inorganic material binding sequences. Moreover, the experiments on the structural classification of proteins (SCOP) data set shows that the proposed framework is general and can be applied to traditional protein sequences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2015:PIM, author = "Xiangyuan Zhu and Kenli Li and Ahmad Salah and Lin Shi and Keqin Li", title = "Parallel implementation of {MAFFT} on {CUDA}-enabled graphics hardware", journal = j-TCBB, volume = "12", number = "1", pages = "205--218", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351801", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiple sequence alignment (MSA) constitutes an extremely powerful tool for many biological applications including phylogenetic tree estimation, secondary structure prediction, and critical residue identification. However, aligning large biological sequences with popular tools such as MAFFT requires long runtimes on sequential architectures. Due to the ever increasing sizes of sequence databases, there is increasing demand to accelerate this task. In this paper, we demonstrate how graphic processing units (GPUs), powered by the compute unified device architecture (CUDA), can be used as an efficient computational platform to accelerate the MAFFT algorithm. To fully exploit the GPU's capabilities for accelerating MAFFT, we have optimized the sequence data organization to eliminate the bandwidth bottleneck of memory access, designed a memory allocation and reuse strategy to make full use of limited memory of GPUs, proposed a new modified-run-length encoding (MRLE) scheme to reduce memory consumption, and used high-performance shared memory to speed up I/O operations. Our implementation tested in three NVIDIA GPUs achieves speedup up to 11.28 on a Tesla K20m GPU compared to the sequential MAFFT 7.015.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2015:PPF, author = "Guoxian Yu and Huzefa Rangwala and Carlotta Domeniconi and Guoji Zhang and Zili Zhang", title = "Predicting protein function using multiple kernels", journal = j-TCBB, volume = "12", number = "1", pages = "219--233", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351821", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput experimental techniques provide a wide variety of heterogeneous proteomic data sources. To exploit the information spread across multiple sources for protein function prediction, these data sources are transformed into kernels and then integrated into a composite kernel. Several methods first optimize the weights on these kernels to produce a composite kernel, and then train a classifier on the composite kernel. As such, these approaches result in an optimal composite kernel, but not necessarily in an optimal classifier. On the other hand, some approaches optimize the loss of binary classifiers and learn weights for the different kernels iteratively. For multi-class or multi-label data, these methods have to solve the problem of optimizing weights on these kernels for each of the labels, which are computationally expensive and ignore the correlation among labels. In this paper, we propose a method called Predicting Protein Function using Multiple Kernels (ProMK). ProMK iteratively optimizes the phases of learning optimal weights and reduces the empirical loss of multi-label classifier for each of the labels simultaneously. ProMK can integrate kernels selectively and downgrade the weights on noisy kernels. We investigate the performance of ProMK on several publicly available protein function prediction benchmarks and synthetic datasets. We show that the proposed approach performs better than previously proposed protein function prediction approaches that integrate multiple data sources and multi-label multiple kernel learning methods. The codes of our proposed method are available at https://sites.google.com/site/guoxian85/promk.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Keijsper:2015:TCB, author = "Judith Keijsper and Tim Oosterwijk", title = "Tractable cases of $ (*, 2)$-bounded parsimony haplotyping", journal = j-TCBB, volume = "12", number = "1", pages = "234--247", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2352031", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Parsimony haplotyping is the problem of finding a set of haplotypes of minimum cardinality that explains a given set of genotypes, where a genotype is explained by two haplotypes if it can be obtained as a combination of the two. This problem is NP-complete in the general case, but polynomially solvable for ( k, l )-bounded instances for certain k and l. Here, k denotes the maximum number of ambiguous sites in any genotype, and l is the maximum number of genotypes that are ambiguous at the same site. Only the complexity of the (*, 2)-bounded problem is still unknown, where * denotes no restriction. It has been proved that (*, 2)-bounded instances have compatibility graphs that can be constructed from cliques and circuits by pasting along an edge. In this paper, we give a constructive proof of the fact that (*, 2)-bounded instances are polynomially solvable if the compatibility graph is constructed by pasting cliques, trees and circuits along a bounded number of edges. We obtain this proof by solving a slightly generalized problem on circuits, trees and cliques respectively, and arguing that all possible combinations of optimal solutions for these graphs that are pasted along a bounded number of edges can be enumerated efficiently.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2015:GES, author = "Feng Luo and Xintao Wu", title = "Guest editorial for special section on {BIBM 2013}", journal = j-TCBB, volume = "12", number = "2", pages = "252--253", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2410132", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2015:PMI, author = "Xingpeng Jiang and Xiaohua Hu and Weiwei Xu and E. K. Park", title = "Predicting microbial interactions using vector autoregressive model with graph regularization", journal = j-TCBB, volume = "12", number = "2", pages = "254--261", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338298", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microbial interactions play important roles on the structure and function of complex microbial communities. With the rapid accumulation of high-throughput metagenomic or 16S rRNA sequencing data, it is possible to infer complex microbial interactions. Co-occurrence patterns of microbial species among multiple samples are often utilized to infer interactions. There are few methods to consider the temporally interacting patterns among microbial species. In this paper, we present a Graph-regularized Vector Autoregressive (GVAR) model to infer causal relationships among microbial entities. The new model has advantage comparing to the original vector autoregressive (VAR) model. Specifically, GVAR can incorporate similarity information for microbial interaction inference --- i.e., GVAR assumed that if two species are similar in the previous stage, they tend to have similar influence on the other species in the next stage. We apply the model on a time series dataset of human gut microbiome which was treated with repeated antibiotics. The experimental results indicate that the new approach has better performance than several other VAR-based models and demonstrate its capability of extracting relevant microbial interactions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wan:2015:PPL, author = "Cen Wan and Alex A. Freitas and Jo{\~a}o Pedro {De Magalh{\~a}es}", title = "Predicting the pro-longevity or anti-longevity effect of model organism genes with new hierarchical feature selection methods", journal = j-TCBB, volume = "12", number = "2", pages = "262--275", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2355218", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ageing is a highly complex biological process that is still poorly understood. With the growing amount of ageing-related data available on the web, in particular concerning the genetics of ageing, it is timely to apply data mining methods to that data, in order to try to discover novel patterns that may assist ageing research. In this work, we introduce new hierarchical feature selection methods for the classification task of data mining and apply them to ageing-related data from four model organisms: Caenorhabditis elegans (worm), Saccharomyces cerevisiae (yeast), Drosophila melanogaster (fly), and Mus musculus (mouse). The main novel aspect of the proposed feature selection methods is that they exploit hierarchical relationships in the set of features (Gene Ontology terms) in order to improve the predictive accuracy of the Na{\"\i}ive Bayes and 1-Nearest Neighbour (1-NN) classifiers, which are used to classify model organisms' genes into pro-longevity or anti-longevity genes. The results show that our hierarchical feature selection methods, when used together with Na{\"\i}ive Bayes and 1-NN classifiers, obtain higher predictive accuracy than the standard (without feature selection) Na{\"\i}ive Bayes and 1-NN classifiers, respectively. We also discuss the biological relevance of a number of Gene Ontology terms very frequently selected by our algorithms in our datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peng:2015:UAI, author = "Wei Peng and Jianxin Wang and Yingjiao Cheng and Yu Lu and Fangxiang Wu and Yi Pan", title = "{UDoNC}: an algorithm for identifying essential proteins based on protein domains and protein--protein interaction networks", journal = j-TCBB, volume = "12", number = "2", pages = "276--288", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338317", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of essential proteins which are crucial to an organism's survival is important for disease analysis and drug design, as well as the understanding of cellular life. The majority of prediction methods infer the possibility of proteins to be essential by using the network topology. However, these methods are limited to the completeness of available protein--protein interaction (PPI) data and depend on the network accuracy. To overcome these limitations, some computational methods have been proposed. However, seldom of them solve this problem by taking consideration of protein domains. In this work, we first analyze the correlation between the essentiality of proteins and their domain features based on data of 13 species. We find that the proteins containing more protein domain types which rarely occur in other proteins tend to be essential. Accordingly, we propose a new prediction method, named UDoNC, by combining the domain features of proteins with their topological properties in PPI network. In UDoNC, the essentiality of proteins is decided by the number and the frequency of their protein domain types, as well as the essentiality of their adjacent edges measured by edge clustering coefficient. The experimental results on S. cerevisiae data show that UDoNC outperforms other existing methods in terms of area under the curve (AUC). Additionally, UDoNC can also perform well in predicting essential proteins on data of E. coli.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mei:2015:ESN, author = "Yongguo Mei and Adria Carbo and Stefan Hoops and Raquel Hontecillas and Josep Bassaganya-Riera", title = "{ENISI SDE}: a new web-based tool for modeling stochastic processes", journal = j-TCBB, volume = "12", number = "2", pages = "289--297", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351823", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Modeling and simulations approaches have been widely used in computational biology, mathematics, bioinformatics and engineering to represent complex existing knowledge and to effectively generate novel hypotheses. While deterministic modeling strategies are widely used in computational biology, stochastic modeling techniques are not as popular due to a lack of user-friendly tools. This paper presents ENISI SDE, a novel web-based modeling tool with stochastic differential equations. ENISI SDE provides user-friendly web user interfaces to facilitate adoption by immunologists and computational biologists. This work provides three major contributions: (1) discussion of SDE as a generic approach for stochastic modeling in computational biology; (2) development of ENISI SDE, a web-based user-friendly SDE modeling tool that highly resembles regular ODE-based modeling; (3) applying ENISI SDE modeling tool through a use case for studying stochastic sources of cell heterogeneity in the context of CD4+ T cell differentiation. The CD4+ T cell differential ODE model has been published [8] and can be downloaded from biomodels.net. The case study reproduces a biological phenomenon that is not captured by the previously published ODE model and shows the effectiveness of SDE as a stochastic modeling approach in biology in general and immunology in particular and the power of ENISI SDE.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pang:2015:IAS, author = "Bin Pang and David Schlessman and Xingyan Kuang and Nan Zhao and Daniel Shyu and Dmitry Korkin and Chi-Ren Shyu", title = "An integrated approach to sequence-independent local alignment of protein binding sites", journal = j-TCBB, volume = "12", number = "2", pages = "298--308", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2355208", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate alignment of protein--protein binding sites can aid in protein docking studies and constructing templates for predicting structure of protein complexes, along with in-depth understanding of evolutionary and functional relationships. However, over the past three decades, structural alignment algorithms have focused predominantly on global alignments with little effort on the alignment of local interfaces. In this paper, we introduce the PBSalign (Protein--protein Binding Site alignment) method, which integrates techniques in graph theory, 3D localized shape analysis, geometric scoring, and utilization of physicochemical and geometrical properties. Computational results demonstrate that PBSalign is capable of identifying similar homologous and analogous binding sites accurately and performing alignments with better geometric match measures than existing protein--protein interface comparison tools. The proportion of better alignment quality generated by PBSalign is 46, 56, and 70 percent more than iAlign as judged by the average match index (MI), similarity index (SI), and structural alignment score (SAS), respectively. PBSalign provides the life science community an efficient and accurate solution to binding-site alignment while striking the balance between topological details and computational complexity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cho:2015:PFR, author = "Young-Rae Cho and Yanan Xin and Greg Speegle", title = "{P-Finder}: reconstruction of signaling networks from protein--protein interactions and {GO} annotations", journal = j-TCBB, volume = "12", number = "2", pages = "309--321", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2355216", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Because most complex genetic diseases are caused by defects of cell signaling, illuminating a signaling cascade is essential for understanding their mechanisms. We present three novel computational algorithms to reconstruct signaling networks between a starting protein and an ending protein using genome-wide protein--protein interaction (PPI) networks and gene ontology (GO) annotation data. A signaling network is represented as a directed acyclic graph in a merged form of multiple linear pathways. An advanced semantic similarity metric is applied for weighting PPIs as the preprocessing of all three methods. The first algorithm repeatedly extends the list of nodes based on path frequency towards an ending protein. The second algorithm repeatedly appends edges based on the occurrence of network motifs which indicate the link patterns more frequently appearing in a PPI network than in a random graph. The last algorithm uses the information propagation technique which iteratively updates edge orientations based on the path strength and merges the selected directed edges. Our experimental results demonstrate that the proposed algorithms achieve higher accuracy than previous methods when they are tested on well-studied pathways of S. cerevisiae. Furthermore, we introduce an interactive web application tool, called P-Finder, to visualize reconstructed signaling networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jeong:2015:NSF, author = "Jong Cheol Jeong and Xuewen Chen", title = "A new semantic functional similarity over gene ontology", journal = j-TCBB, volume = "12", number = "2", pages = "322--334", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343963", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identifying functionally similar or closely related genes and gene products has significant impacts on biological and clinical studies as well as drug discovery. In this paper, we propose an effective and practically useful method measuring both gene and gene product similarity by integrating the topology of gene ontology, known functional domains and their functional annotations. The proposed method is comprehensively evaluated through statistical analysis of the similarities derived from sequence, structure and phylogenetic profiles, and clustering analysis of disease genes clusters. Our results show that the proposed method clearly outperforms other conventional methods. Furthermore, literature analysis also reveals that the proposed method is both statistically and biologically promising for identifying functionally similar genes or gene products. In particular, we demonstrate that the proposed functional similarity metric is capable of discovering new disease-related genes or gene products.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ashtawy:2015:CAP, author = "Hossam M. Ashtawy and Nihar R. Mahapatra", title = "A comparative assessment of predictive accuracies of conventional and machine learning scoring functions for protein--ligand binding affinity prediction", journal = j-TCBB, volume = "12", number = "2", pages = "335--347", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351824", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurately predicting the binding affinities of large diverse sets of protein--ligand complexes efficiently is a key challenge in computational biomolecular science, with applications in drug discovery, chemical biology, and structural biology. Since a scoring function (SF) is used to score, rank, and identify potential drug leads, the fidelity with which it predicts the affinity of a ligand candidate for a protein's binding site has a significant bearing on the accuracy of virtual screening. Despite intense efforts in developing conventional SFs, which are either force-field based, knowledge-based, or empirical, their limited predictive accuracy has been a major roadblock toward cost-effective drug discovery. Therefore, in this work, we explore a range of novel SFs employing different machine-learning (ML) approaches in conjunction with a variety of physicochemical and geometrical features characterizing protein--ligand complexes. We assess the scoring accuracies of these new ML SFs as well as those of conventional SFs in the context of the 2007 and 2010 PDBbind benchmark datasets on both diverse and protein--family-specific test sets. We also investigate the influence of the size of the training dataset and the type and number of features used on scoring accuracy. We find that the best performing ML SF has a Pearson correlation coefficient of 0.806 between predicted and measured binding affinities compared to 0.644 achieved by a state-of-the-art conventional SF. We also find that ML SFs benefit more than their conventional counterparts from increases in the number of features and the size of training dataset. In addition, they perform better on novel proteins that they were never trained on before.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2015:FDW, author = "Lina Yang and Yuan Yan Tang and Yang Lu and Huiwu Luo", title = "A fractal dimension and wavelet transform based method for protein sequence similarity analysis", journal = j-TCBB, volume = "12", number = "2", pages = "348--369", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2363480", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the key tasks related to proteins is the similarity comparison of protein sequences in the area of bioinformatics and molecular biology, which helps the prediction and classification of protein structure and function. It is a significant and open issue to find similar proteins from a large scale of protein database efficiently. This paper presents a new distance based protein similarity analysis using a new encoding method of protein sequence which is based on fractal dimension. The protein sequences are first represented into the 1-dimensional feature vectors by their biochemical quantities. A series of Hybrid method involving discrete Wavelet transform, Fractal dimension calculation (HWF) with sliding window are then applied to form the feature vector. At last, through the similarity calculation, we can obtain the distance matrix, by which, the phylogenic tree can be constructed. We apply this approach by analyzing the ND5 (NADH dehydrogenase subunit 5) protein cluster data set. The experimental results show that the proposed model is more accurate than the existing ones such as Su's model, Zhang's model, Yao's model and MEGA software, and it is consistent with some known biological facts.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Konur:2015:PDM, author = "Savas Konur and Marian Gheorghe", title = "A property-driven methodology for formal analysis of synthetic biology systems", journal = j-TCBB, volume = "12", number = "2", pages = "360--371", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2362531", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper proposes a formal methodology to analyse bio-systems, in particular synthetic biology systems. An integrative analysis perspective combining different model checking approaches based on different property categories is provided. The methodology is applied to the synthetic pulse generator system and several verification experiments are carried out to demonstrate the use of our approach to formally analyse various aspects of synthetic biology systems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2015:TPB, author = "Min Li and Yu Lu and Jianxin Wang and Fang-Xiang Wu and Yi Pan", title = "A topology potential-based method for identifying essential proteins from {PPI} networks", journal = j-TCBB, volume = "12", number = "2", pages = "372--383", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361350", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Essential proteins are indispensable for cellular life. It is of great significance to identify essential proteins that can help us understand the minimal requirements for cellular life and is also very important for drug design. However, identification of essential proteins based on experimental approaches are typically time-consuming and expensive. With the development of high-throughput technology in the post-genomic era, more and more protein--protein interaction data can be obtained, which make it possible to study essential proteins from the network level. There have been a series of computational approaches proposed for predicting essential proteins based on network topologies. Most of these topology based essential protein discovery methods were to use network centralities. In this paper, we investigate the essential proteins' topological characters from a completely new perspective. To our knowledge it is the first time that topology potential is used to identify essential proteins from a protein--protein interaction (PPI) network. The basic idea is that each protein in the network can be viewed as a material particle which creates a potential field around itself and the interaction of all proteins forms a topological field over the network. By defining and computing the value of each protein's topology potential, we can obtain a more precise ranking which reflects the importance of proteins from the PPI network. The experimental results show that topology potential-based methods TP and TP-NC outperform traditional topology measures: degree centrality (DC), betweenness centrality (BC), closeness centrality (CC), subgraph centrality (SC), eigenvector centrality (EC), information centrality (IC), and network centrality (NC) for predicting essential proteins. In addition, these centrality measures are improved on their performance for identifying essential proteins in biological network when controlled by topology potential.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2015:EEA, author = "Qiang Yu and Hongwei Huo and Jeffrey Scott Vitter and Jun Huan and Yakov Nekrich", title = "An efficient exact algorithm for the motif stem search problem over large alphabets", journal = j-TCBB, volume = "12", number = "2", pages = "384--397", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361668", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In recent years, there has been an increasing interest in planted ( l, d ) motif search (PMS) with applications to discovering significant segments in biological sequences. However, there has been little discussion about PMS over large alphabets. This paper focuses on motif stem search (MSS), which is recently introduced to search motifs on large-alphabet inputs. A motif stem is an l -length string with some wildcards. The goal of the MSS problem is to find a set of stems that represents a superset of all ( l, d ) motifs present in the input sequences, and the superset is expected to be as small as possible. The three main contributions of this paper are as follows: (1) We build motif stem representation more precisely by using regular expressions. (2) We give a method for generating all possible motif stems without redundant wildcards. (3) We propose an efficient exact algorithm, called StemFinder, for solving the MSS problem. Compared with the previous MSS algorithms, StemFinder runs much faster and reports fewer stems which represent a smaller superset of all ( l, d ) motifs. StemFinder is freely available at http://sites.google.com/site/feqond/stemfinder.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2015:III, author = "Xian Zhang and Ligang Wu and Shaochun Cui", title = "An improved integral inequality to stability analysis of genetic regulatory networks with interval time-varying delays", journal = j-TCBB, volume = "12", number = "2", pages = "398--409", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351815", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper focuses on stability analysis for a class of genetic regulatory networks with interval time-varying delays. An improved integral inequality concerning on double-integral items is first established. Then, we use the improved integral inequality to deal with the resultant double-integral items in the derivative of the involved Lyapunov-Krasovskii functional. As a result, a delay-range-dependent and delay-rate-dependent asymptotical stability criterion is established for genetic regulatory networks with differential time-varying delays. Furthermore, it is theoretically proven that the stability criterion proposed here is less conservative than the corresponding one in [Neurocomputing, 2012, 93: 19-26]. Based on the obtained result, another stability criterion is given under the case that the information of the derivatives of delays is unknown. Finally, the effectiveness of the approach proposed in this paper is illustrated by a pair of numerical examples which give the comparisons of stability criteria proposed in this paper and some literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2015:BLC, author = "Zhenhua Li and Ying He and Limsoon Wong and Jinyan Li", title = "Burial level change defines a high energetic relevance for protein binding interfaces", journal = j-TCBB, volume = "12", number = "2", pages = "410--421", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361355", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein--protein interfaces defined through atomic contact or solvent accessibility change are widely adopted in structural biology studies. But, these definitions cannot precisely capture energetically important regions at protein interfaces. The burial depth of an atom in a protein is related to the atom's energy. This work investigates how closely the change in burial level of an atom/residue upon complexation is related to the binding. Burial level change is different from burial level itself. An atom deeply buried in a monomer with a high burial level may not change its burial level after an interaction and it may have little burial level change. We hypothesize that an interface is a region of residues all undergoing burial level changes after interaction. By this definition, an interface can be decomposed into an onion-like structure according to the burial level change extent. We found that our defined interfaces cover energetically important residues more precisely, and that the binding free energy of an interface is distributed progressively from the outermost layer to the core. These observations are used to predict binding hot spots. Our approach's F-measure performance on a benchmark dataset of alanine mutagenesis residues is much superior or similar to those by complicated energy modeling or machine learning approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dasarathy:2015:DRP, author = "Gautam Dasarathy and Robert Nowak and Sebastien Roch", title = "Data requirement for phylogenetic inference from multiple loci: a new distance method", journal = j-TCBB, volume = "12", number = "2", pages = "422--432", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361685", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider the problem of estimating the evolutionary history of a set of species (phylogeny or species tree) from several genes. It is known that the evolutionary history of individual genes (gene trees) might be topologically distinct from each other and from the underlying species tree, possibly confounding phylogenetic analysis. A further complication in practice is that one has to estimate gene trees from molecular sequences of finite length. We provide the first full data-requirement analysis of a species tree reconstruction method that takes into account estimation errors at the gene level. Under that criterion, we also devise a novel reconstruction algorithm that provably improves over all previous methods in a regime of interest.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meng:2015:GSI, author = "Jun Meng and Jing Zhang and Yushi Luan", title = "Gene selection integrated with biological knowledge for plant stress response using neighborhood system and rough set theory", journal = j-TCBB, volume = "12", number = "2", pages = "433--444", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361329", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mining knowledge from gene expression data is a hot research topic and direction of bioinformatics. Gene selection and sample classification are significant research trends, due to the large amount of genes and small size of samples in gene expression data. Rough set theory has been successfully applied to gene selection, as it can select attributes without redundancy. To improve the interpretability of the selected genes, some researchers introduced biological knowledge. In this paper, we first employ neighborhood system to deal directly with the new information table formed by integrating gene expression data with biological knowledge, which can simultaneously present the information in multiple perspectives and do not weaken the information of individual gene for selection and classification. Then, we give a novel framework for gene selection and propose a significant gene selection method based on this framework by employing reduction algorithm in rough set theory. The proposed method is applied to the analysis of plant stress response. Experimental results on three data sets show that the proposed method is effective, as it can select significant gene subsets without redundancy and achieve high classification accuracy. Biological analysis for the results shows that the interpretability is well.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cickovski:2015:GPI, author = "Trevor Cickovski and Tiffany Flor and Galen Irving-Sachs and Philip Novikov and James Parda and Giri Narasimhan", title = "{GPUDePiCt}: a parallel implementation of a clustering algorithm for computing degenerate primers on graphics processing units", journal = j-TCBB, volume = "12", number = "2", pages = "445--454", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2355231", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In order to make multiple copies of a target sequence in the laboratory, the technique of Polymerase Chain Reaction (PCR) requires the design of ``primers'', which are short fragments of nucleotides complementary to the flanking regions of the target sequence. If the same primer is to amplify multiple closely related target sequences, then it is necessary to make the primers ``degenerate'', which would allow it to hybridize to target sequences with a limited amount of variability that may have been caused by mutations. However, the PCR technique can only allow a limited amount of degeneracy, and therefore the design of degenerate primers requires the identification of reasonably well-conserved regions in the input sequences. We take an existing algorithm for designing degenerate primers that is based on clustering and parallelize it in a web-accessible software package GPUDePiCt, using a shared memory model and the computing power of Graphics Processing Units (GPUs). We test our implementation on large sets of aligned sequences from the human genome and show a multi-fold speedup for clustering using our hybrid GPU/CPU implementation over a pure CPU approach for these sequences, which consist of more than 7,500 nucleotides. We also demonstrate that this speedup is consistent over larger numbers and longer lengths of aligned sequences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cai:2015:IPC, author = "Bingjing Cai and Haiying Wang and Huiru Zheng and Hui Wang", title = "Identification of protein complexes from tandem affinity purification\slash mass spectrometry data via biased random walk", journal = j-TCBB, volume = "12", number = "2", pages = "455--466", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2352616", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Systematic identification of protein complexes from protein--protein interaction networks (PPIs) is an important application of data mining in life science. Over the past decades, various new clustering techniques have been developed based on modelling PPIs as binary relations. Non-binary information of co-complex relations (prey/bait) in PPIs data derived from tandem affinity purification/mass spectrometry (TAP-MS) experiments has been unfairly disregarded. In this paper, we propose a Biased Random Walk based algorithm for detecting protein complexes from TAP-MS data, resulting in the random walk with restarting baits (RWRB). RWRB is developed based on Random walk with restart. The main contribution of RWRB is the incorporation of co-complex relations in TAP-MS PPI networks into the clustering process, by implementing a new restarting strategy during the process of random walk. Through experimentation on un-weighted and weighted TAP-MS data sets, we validated biological significance of our results by mapping them to manually curated complexes. Results showed that, by incorporating non-binary, co-membership information, significant improvement has been achieved in terms of both statistical measurements and biological relevance. Better accuracy demonstrates that the proposed method outperformed several state-of-the-art clustering algorithms for the detection of protein complexes in TAP-MS data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2015:IDN, author = "Xueming Liu and Linqiang Pan", title = "Identifying driver nodes in the human signaling network using structural controllability analysis", journal = j-TCBB, volume = "12", number = "2", pages = "467--472", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2360396", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cell signaling governs the basic cellular activities and coordinates the actions in cell. Abnormal regulations in cell signaling processing are responsible for many human diseases, such as diabetes and cancers. With the accumulation of massive data related to human cell signaling, it is feasible to obtain a human signaling network. Some studies have shown that interesting biological phenomenon and drug-targets could be discovered by applying structural controllability analysis to biological networks. In this work, we apply structural controllability to a human signaling network and detect driver nodes, providing a systematic analysis of the role of different proteins in controlling the human signaling network. We find that the proteins in the upstream of the signaling information flow and the low in-degree proteins play a crucial role in controlling the human signaling network. Interestingly, inputting different control signals on the regulators of the cancer-associated genes could cost less than controlling the cancer-associated genes directly in order to control the whole human signaling network in the sense that less drive nodes are needed. This research provides a fresh perspective for controlling the human cell signaling system.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jamil:2015:IIE, author = "Hasan M. Jamil", title = "Improving integration effectiveness of {ID} mapping based biological record linkage", journal = j-TCBB, volume = "12", number = "2", pages = "473--486", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2355213", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Traditionally, biological objects such as genes, proteins, and pathways are represented by a convenient identifier, or ID, which is then used to cross reference, link and describe objects in biological databases. Relationships among the objects are often established using non-trivial and computationally complex ID mapping systems or converters, and are stored in authoritative databases such as UniGene, GeneCards, PIR and BioMart. Despite best efforts, such mappings are largely incomplete and riddled with false negatives. Consequently, data integration using record linkage that relies on these mappings produces poor quality of data, inadvertently leading to erroneous conclusions. In this paper, we discuss this largely ignored dimension of data integration, examine how the ubiquitous use of identifiers in biological databases is a significant barrier to knowledge fusion using distributed computational pipelines, and propose two algorithms for ad hoc and restriction free ID mapping of arbitrary types using online resources. We also propose two declarative statements for ID conversion and data integration based on ID mapping on-the-fly.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Priyadarshana:2015:MBP, author = "W. J. R. M. Priyadarshana and Georgy Sofronov", title = "Multiple break-points detection in array {CGH} data via the cross-entropy method", journal = j-TCBB, volume = "12", number = "2", pages = "487--498", month = mar, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361639", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Array comparative genome hybridization (aCGH) is a widely used methodology to detect copy number variations of a genome in high resolution. Knowing the number of break-points and their corresponding locations in genomic sequences serves different biological needs. Primarily, it helps to identify disease-causing genes that have functional importance in characterizing genome wide diseases. For human autosomes the normal copy number is two, whereas at the sites of oncogenes it increases (gain of DNA) and at the tumour suppressor genes it decreases (loss of DNA). The majority of the current detection methods are deterministic in their set-up and use dynamic programming or different smoothing techniques to obtain the estimates of copy number variations. These approaches limit the search space of the problem due to different assumptions considered in the methods and do not represent the true nature of the uncertainty associated with the unknown break-points in genomic sequences. We propose the Cross-Entropy method, which is a model-based stochastic optimization technique as an exact search method, to estimate both the number and locations of the break-points in aCGH data. We model the continuous scale log-ratio data obtained by the aCGH technique as a multiple breakpoint problem. The proposed methodology is compared with well established publicly available methods using both artificially generated data and real data. Results show that the proposed procedure is an effective way of estimating number and especially the locations of break-points with high level of precision. Availability: The methods described in this article are implemented in the new R package breakpoint and it is available from the Comprehensive R Archive Network at http://CRAN.R-project.org/package=breakpoint.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Setubal:2015:TSS, author = "Jo{\~a}o C. Setubal and Nalvo Almeida", title = "{TCBB} special section on the {Brazilian Symposium on Bioinformatics 2013}", journal = j-TCBB, volume = "12", number = "3", pages = "499--499", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2410352", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Braga:2015:SLG, author = "Mar{\'\i}lia D. V. Braga and Jens Stoye", title = "Sorting linear genomes with rearrangements and indels", journal = j-TCBB, volume = "12", number = "3", pages = "500--506", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2329297", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Rearrangements are mutations that can change the organization of a genome, but not its content. Examples are inversions of DNA segments, translocations of chromosome ends, fusions and fissions of chromosomes. All mentioned rearrangements can be represented by the generic Double Cut and Join (DCJ) operation. However, the DCJ operation also allows circular chromosomes to be created at intermediate steps, even if the compared genomes are linear. In this case it is more plausible to consider a restriction in which the reincorporation of a circular chromosome has to be done immediately after its creation. We call these two consecutive operations an ER composition. It has been shown that an ER composition mimics either an internal block interchange (when two segments in the same chromosome exchange their positions), or an internal transposition (the special case of a block interchange when the two segments are adjacent). The DCJ distance of two genomes is the same, regardless of this restriction, and can be computed in linear time. For comparing two genomes with unequal contents, in addition to rearrangements we have to allow insertions and deletions of DNA segments--named indels. It is already known that the distance in the model combining DCJ and indel operations can be exactly computed. Again, for linear genomes it would be more plausible to adopt a restricted version with ER compositions. This model was studied recently by da Silva et al. (BMC Bioinformatics 13, Suppl. 19, S14, 2012), but only an upper bound for the restricted DCJ-indel distance was provided. Here we first solve an open problem posed in that paper and present a very simple proof showing that the distance, which can be computed in linear time, is the same for both the unrestricted and the restricted DCJ-indel models. We then give a simpler algorithm for computing an optimal restricted DCJ-indel sorting scenario in O(n log n) time. We also relate the DCJ-indel distance to the restricted DCJ-substitution distance, which instead of indels considers a more powerful operation that allows the substitution of a DNA segment by another DNA segment. We show that the DCJ-indel distance is a 2-approximation for the restricted DCJ-substitution distance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Siederdissen:2015:PGA, author = "Christian H{\"o}ner Zu Siederdissen and Ivo L. Hofacker and Peter F. Stadler", title = "Product grammars for alignment and folding", journal = j-TCBB, volume = "12", number = "3", pages = "507--519", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2326155", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We develop a theory of algebraic operations over linear and context-free grammars that makes it possible to combine simple ``atomic'' grammars operating on single sequences into complex, multi-dimensional grammars. We demonstrate the utility of this framework by constructing the search spaces of complex alignment problems on multiple input sequences explicitly as algebraic expressions of very simple one-dimensional grammars. In particular, we provide a fully worked frameshift-aware, semiglobal DNA-protein alignment algorithm whose grammar is composed of products of small, atomic grammars. The compiler accompanying our theory makes it easy to experiment with the combination of multiple grammars and different operations. Composite grammars can be written out in LATEX for documentation and as a guide to implementation of dynamic programming algorithms. An embedding in Haskell as a domain-specific language makes the theory directly accessible to writing and using grammar products without the detour of an external compiler. Software and supplemental files available here: http://www.bioinf.uni-leipzig.de/Software/gramprod/", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hoksza:2015:MRS, author = "David Hoksza and Daniel Svozil", title = "Multiple {$3$D} {RNA} structure superposition using neighbor joining", journal = j-TCBB, volume = "12", number = "3", pages = "520--530", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351810", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent advances in RNA research and the steady growth of available RNA structures call for bioinformatics methods for handling and analyzing RNA structural data. Recently, we introduced SETTER--a fast and accurate method for RNA pairwise structure alignment. In this paper, we describe MultiSETTER, SETTER extension for multiple RNA structure alignment. MultiSETTER combines SETTER's decomposition of RNA structures into non-overlapping structural subunits with the multiple sequence alignment algorithm ClustalW adapted for the structure alignment. The accuracy of MultiSETTER was assessed by the automatic classification of RNA structures and its comparison to SCOR annotations. In addition, MultiSETTER classification was also compared to multiple sequence alignment-based and secondary structure alignment-based classifications provided by LocARNA and RNADistance tools, respectively. MultiSETTER precompiled Windows libraries, as well as the C++ source code, are freely available from http://siret.cz/multisetter.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Carroll:2015:IRE, author = "Hyrum D. Carroll and Alex C. Williams and Anthony G. Davis and John L. Spouge", title = "Improving retrieval efficacy of homology searches using the false discovery rate", journal = j-TCBB, volume = "12", number = "3", pages = "531--537", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366112", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Over the past few decades, discovery based on sequence homology has become a widely accepted practice. Consequently, comparative accuracy of retrieval algorithms (e.g., BLAST) has been rigorously studied for improvement. Unlike most components of retrieval algorithms, the E-value threshold criterion has yet to be thoroughly investigated. An investigation of the threshold is important as it exclusively dictates which sequences are declared relevant and irrelevant. In this paper, we introduce the false discovery rate (FDR) statistic as a replacement for the uniform threshold criterion in order to improve efficacy in retrieval systems. Using NCBI's BLAST and PSI-BLAST software packages, we demonstrate the applicability of such a replacement in both non-iterative (BLAST FDR) and iterative (PSI-BLASTFDR) homology searches. For each application, we performed an evaluation of retrieval efficacy with five different multiple testing methods on a large training database. For each algorithm, we choose the best performing method, Benjamini-Hochberg, as the default statistic. As measured by the threshold average precision, BLASTFDR yielded 14.1 percent better retrieval performance than BLAST on a large (5,161 queries) test database and PSI-BLASTFDR attained 11.8 percent better retrieval performance than PSI-BLAST. The C++ source code specific to BLASTFDR and PSI-BLASTFDR and instructions are available at http://www.cs.mtsu.edu/~hcarroll/blast_fdr/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Birlutiu:2015:BFC, author = "Adriana Birlutiu and Florence D'Alch{\'e}-Buc and Tom Heskes", title = "A {Bayesian} framework for combining protein and network topology information for predicting protein--protein interactions", journal = j-TCBB, volume = "12", number = "3", pages = "538--550", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359441", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational methods for predicting protein--protein interactions are important tools that can complement high-throughput technologies and guide biologists in designing new laboratory experiments. The proteins and the interactions between them can be described by a network which is characterized by several topological properties. Information about proteins and interactions between them, in combination with knowledge about topological properties of the network, can be used for developing computational methods that can accurately predict unknown protein--protein interactions. This paper presents a supervised learning framework based on Bayesian inference for combining two types of information: (i) network topology information, and (ii) information related to proteins and the interactions between them. The motivation of our model is that by combining these two types of information one can achieve a better accuracy in predicting protein--protein interactions, than by using models constructed from these two types of information independently.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2015:MLA, author = "Yinglei Song and Chunmei Liu and Zhi Wang", title = "A machine learning approach for accurate annotation of noncoding {RNAs}", journal = j-TCBB, volume = "12", number = "3", pages = "551--559", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366758", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Searching genomes to locate noncoding RNA genes with known secondary structure is an important problem in bioinformatics. In general, the secondary structure of a searched noncoding RNA is defined with a structure model constructed from the structural alignment of a set of sequences from its family. Computing the optimal alignment between a sequence and a structure model is the core part of an algorithm that can search genomes for noncoding RNAs. In practice, a single structure model may not be sufficient to capture all crucial features important for a noncoding RNA family. In this paper, we develop a novel machine learning approach that can efficiently search genomes for noncoding RNAs with high accuracy. During the search procedure, a sequence segment in the searched genome sequence is processed and a feature vector is extracted to represent it. Based on the feature vector, a classifier is used to determine whether the sequence segment is the searched ncRNA or not. Our testing results show that this approach is able to efficiently capture crucial features of a noncoding RNA family. Compared with existing search tools, it significantly improves the accuracy of genome annotation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mehmood:2015:PLS, author = "Tahir Mehmood and Jon Bohlin and Lars Snipen", title = "A partial least squares based procedure for upstream sequence classification in prokaryotes", journal = j-TCBB, volume = "12", number = "3", pages = "560--567", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366146", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The upstream region of coding genes is important for several reasons, for instance locating transcription factor, binding sites, and start site initiation in genomic DNA. Motivated by a recently conducted study, where multivariate approach was successfully applied to coding sequence modeling, we have introduced a partial least squares (PLS) based procedure for the classification of true upstream prokaryotic sequence from background upstream sequence. The upstream sequences of conserved coding genes over genomes were considered in analysis, where conserved coding genes were found by using pan-genomics concept for each considered prokaryotic species. PLS uses position specific scoring matrix (PSSM) to study the characteristics of upstream region. Results obtained by PLS based method were compared with Gini importance of random forest (RF) and support vector machine (SVM), which is much used method for sequence classification. The upstream sequence classification performance was evaluated by using cross validation, and suggested approach identifies prokaryotic upstream region significantly better to RF (p-value {$<$} 0:01) and SVM (p-value {$<$} 0:01). Further, the proposed method also produced results that concurred with known biological characteristics of the upstream region.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dong:2015:ANA, author = "Liang Dong and Bing Shi and Guangdong Tian and YanBo Li and Bing Wang and MengChu Zhou", title = "An accurate de novo algorithm for glycan topology determination from mass spectra", journal = j-TCBB, volume = "12", number = "3", pages = "568--578", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2368981", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Determining the glycan topology automatically from mass spectra represents a great challenge. Existing methods fall into approximate and exact ones. The former including greedy and heuristic ones can reduce the computational complexity, but suffer from information lost in the procedure of glycan interpretation. The latter including dynamic programming and exhaustive enumeration are much slower than the former. In the past years, nearly all emerging methods adopted a tree structure to represent a glycan. They share such problems as repetitive peak counting in reconstructing a candidate structure. Besides, tree-based glycan representation methods often have to give different computational formulas for binary and ternary glycans. We propose a new directed acyclic graph structure for glycan representation. Based on it, this work develops a de novo algorithm to accurately reconstruct the tree structure iteratively from mass spectra with logical constraints and some known biosynthesis rules, by a single computational formula. The experiments on multiple complex glycans extracted from human serum show that the proposed algorithm can achieve higher accuracy to determine a glycan topology than prior methods without increasing computational burden.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2015:CNI, author = "Pei Wang and Jinhu L{\"u} and Xinghuo Yu", title = "Colored noise induced bistable switch in the genetic toggle switch systems", journal = j-TCBB, volume = "12", number = "3", pages = "579--589", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2368982", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Noise can induce various dynamical behaviors in nonlinear systems. White noise perturbed systems have been extensively investigated during the last decades. In gene networks, experimentally observed extrinsic noise is colored. As an attempt, we investigate the genetic toggle switch systems perturbed by colored extrinsic noise and with kinetic parameters. Compared with white noise perturbed systems, we show there also exists optimal colored noise strength to induce the best stochastic switch behaviors in the single toggle switch, and the best synchronized switching in the networked systems, which demonstrate that noise-induced optimal switch behaviors are widely in existence. Moreover, under a wide range of system parameter regions, we find there exist wider ranges of white and colored noises strengths to induce good switch and synchronization behaviors, respectively; therefore, white noise is beneficial for switch and colored noise is beneficial for population synchronization. Our observations are very robust to extrinsic stimulus strength, cell density, and diffusion rate. Finally, based on the Waddington's epigenetic landscape and the Wiener-Khintchine theorem, physical mechanisms underlying the observations are interpreted. Our investigations can provide guidelines for experimental design, and have potential clinical implications in gene therapy and synthetic biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bhattacharyya:2015:CCS, author = "Sourya Bhattacharyya and Jayanta Mukherjee", title = "{COSPEDTree}: couplet supertree by equivalence partitioning of taxa set and {DAG} formation", journal = j-TCBB, volume = "12", number = "3", pages = "590--603", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366778", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "From a set of phylogenetic trees with overlapping taxa set, a supertree exhibits evolutionary relationships among all input taxa. The key is to resolve the contradictory relationships with respect to input trees, between individual taxa subsets. Formulation of this NP hard problem employs either local search heuristics to reduce tree search space, or resolves the conflicts with respect to fixed or varying size subtree level decompositions. Different approximation techniques produce supertrees with considerable performance variations. Moreover, the majority of the algorithms involve high computational complexity, thus not suitable for use on large biological data sets. Current study presents COSPEDTree, a novel method for supertree construction. The technique resolves source tree conflicts by analyzing couplet (taxa pair) relationships for each source trees. Subsequently, individual taxa pairs are resolved with a single relation. To prioritize the consensus relations among individual taxa pairs for resolving them, greedy scoring is employed to assign higher score values for the consensus relations among a taxa pair. Selected set of relations resolving individual taxa pairs is subsequently used to construct a directed acyclic graph (DAG). Vertices of DAG represents a taxa subset inferred from the same speciation event. Thus, COSPEDTree can generate non-binary supertrees as well. Depth first traversal on this DAG yields final supertree. According to the performance metrics on branch dissimilarities (such as FP, FN and RF), COSPEDTree produces mostly conservative, well resolved supertrees. Specifically, RF metrics are mostly lower compared to the reference approaches, and FP values are lower apart from only strictly conservative (or veto) approaches. COSPEDTree has worst case time and space complexities of cubic and quadratic order, respectively, better or comparable to the reference approaches. Such high performance and low computational costs enable COSPEDTree to be applied on large scale biological data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gopinath:2015:DKD, author = "Krishnasamy Gopinath and Ramaraj Jayakumararaj and Muthusamy Karthikeyan", title = "{DAPD}: a knowledgebase for diabetes associated proteins", journal = j-TCBB, volume = "12", number = "3", pages = "604--610", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent advancements in genomics and proteomics provide a solid foundation for understanding the pathogenesis of diabetes. Proteomics of diabetes associated pathways help to identify the most potent target for the management of diabetes. The relevant datasets are scattered in various prominent sources which takes much time to select the therapeutic target for the clinical management of diabetes. However, additional information about target proteins is needed for validation. This lacuna may be resolved by linking diabetes associated genes, pathways and proteins and it will provide a strong base for the treatment and planning management strategies of diabetes. Thus, a web source ``Diabetes Associated Proteins Database (DAPD)'' has been developed to link the diabetes associated genes, pathways and proteins using PHP, MySQL. The current version of DAPD has been built with proteins associated with different types of diabetes. In addition, DAPD has been linked to external sources to gain the access to more participatory proteins and their pathway network. DAPD will reduce the time and it is expected to pave the way for the discovery of novel anti-diabetic leads using computational drug designing for diabetes management. DAPD is open accessed via following url www.mkarthikeyan.bioinfoau.org/dapd.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2015:DCP, author = "Dong-Jun Yu and Yang Li and Jun Hu and Xibei Yang and Jing-Yu Yang and Hong-Bin Shen", title = "Disulfide connectivity prediction based on modelled protein {$3$D} structural information and random forest regression", journal = j-TCBB, volume = "12", number = "3", pages = "611--621", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359451", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Disulfide connectivity is an important protein structural characteristic. Accurately predicting disulfide connectivity solely from protein sequence helps to improve the intrinsic understanding of protein structure and function, especially in the post-genome era where large volume of sequenced proteins without being functional annotated is quickly accumulated. In this study, a new feature extracted from the predicted protein 3D structural information is proposed and integrated with traditional features to form discriminative features. Based on the extracted features, a random forest regression model is performed to predict protein disulfide connectivity. We compare the proposed method with popular existing predictors by performing both cross-validation and independent validation tests on benchmark datasets. The experimental results demonstrate the superiority of the proposed method over existing predictors. We believe the superiority of the proposed method benefits from both the good discriminative capability of the newly developed features and the powerful modelling capability of the random forest. The web server implementation, called Target Disulfide, and the benchmark datasets are freely available at: http://csbio.njust.edu.cn/bioinf/TargetDisulfide for academic use.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2015:EMS, author = "Lei Huang and Li Liao and Cathy H. Wu", title = "Evolutionary model selection and parameter estimation for protein--protein interaction network based on differential evolution algorithm", journal = j-TCBB, volume = "12", number = "3", pages = "622--631", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366748", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Revealing the underlying evolutionary mechanism plays an important role in understanding protein interaction networks in the cell. While many evolutionary models have been proposed, the problem about applying these models to real network data, especially for differentiating which model can better describe evolutionary process for the observed network remains a challenge. The traditional way is to use a model with presumed parameters to generate a network, and then evaluate the fitness by summary statistics, which however cannot capture the complete network structures information and estimate parameter distribution. In this work, we developed a novel method based on Approximate Bayesian Computation and modified Differential Evolution algorithm (ABC-DEP) that is capable of conducting model selection and parameter estimation simultaneously and detecting the underlying evolutionary mechanisms for PPI networks more accurately. We tested our method for its power in differentiating models and estimating parameters on simulated data and found significant improvement in performance benchmark, as compared with a previous method. We further applied our method to real data of protein interaction networks in human and yeast. Our results show duplication attachment model as the predominant evolutionary mechanism for human PPI networks and Scale-Free model as the predominant mechanism for yeast PPI networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Basu:2015:EIF, author = "Saurav Basu and Chi Liu and Gustavo Kunde Rohde", title = "Extraction of individual filaments from {$2$D} confocal microscopy images of flat cells", journal = j-TCBB, volume = "12", number = "3", pages = "632--643", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2372783", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A crucial step in understanding the architecture of cells and tissues from microscopy images, and consequently explain important biological events such as wound healing and cancer metastases, is the complete extraction and enumeration of individual filaments from the cellular cytoskeletal network. Current efforts at quantitative estimation of filament length distribution, architecture and orientation from microscopy images are predominantly limited to visual estimation and indirect experimental inference. Here we demonstrate the application of a new algorithm to reliably estimate centerlines of biological filament bundles and extract individual filaments from the centerlines by systematically disambiguating filament intersections. We utilize a filament enhancement step followed by reverse diffusion based filament localization and an integer programming based set combination to systematically extract accurate filaments automatically from microscopy images. Experiments on simulated and real confocal microscope images of flat cells (2D images) show efficacy of the new method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ng:2015:FAL, author = "Yen Kaow Ng and Linzhi Yin and Hirotaka Ono and Shuai Cheng Li", title = "Finding all longest common segments in protein structures efficiently", journal = j-TCBB, volume = "12", number = "3", pages = "644--655", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2372782", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Local/Global Alignment (Zemla, 2003), or LGA, is a popular method for the comparison of protein structures. One of the two components of LGA requires us to compute the longest common contiguous segments between two protein structures. That is, given two structures $ A = (a_1; \ldots {}; a_n) $ and $ B = (b_1; \ldots {}; b_n) $ where $ a_k, b_k \in R^3 $, we are to find, among all the segments $ f = (a_i; \ldots {}; a_j) $ and $ g = (b_i; \ldots {}; b_j) $ that fulfill a certain criterion regarding their similarity, those of the maximum length. We consider the following criteria: (1) the root mean squared deviation (RMSD) between $f$ and $g$ is to be within a given $ t \in R$; (2) $f$ and $g$ can be superposed such that for each $ k, i \leq k \leq j$, $ ||a k - b k|| \leq t$ for a given $ t \in R$. We give an algorithm of $ O(n \log n + n l)$ time complexity when the first requirement applies, where $l$ is the maximum length of the segments fulfilling the criterion. We show an FPTAS which, for any $ \epsilon \in R$, finds a segment of length at least $l$, but of RMSD up to $ (1 + \epsilon) t$, in $ O(n \log n + n / \epsilon)$ time. We propose an FPTAS which for any given $ \epsilon \in R$, finds all the segments $f$ and $g$ of the maximum length which can be superposed such that for each $ k, i \leq k \leq j$, $ ||a k - b k|| \leq (1 + \epsilon) t$, thus fulfilling the second requirement approximately. The algorithm has a time complexity of $ O(n \log^2 n / \epsilon^5)$ when consecutive points in $A$ are separated by the same distance (which is the case with protein structures). These worst-case runtime complexities are verified using C++ implementations of the algorithms, which we have made available at http://alcs.sourceforge.net/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gonzalez-Alvarez:2015:FPP, author = "David L. Gonz{\'a}lez-{\'A}lvarez and Miguel A. Vega-Rodr{\'\i}guez and {\'A}lvaro Rubio-Largo", title = "Finding patterns in protein sequences by using a hybrid multiobjective teaching learning based optimization algorithm", journal = j-TCBB, volume = "12", number = "3", pages = "656--666", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2369043", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proteins are molecules that form the mass of living beings. These proteins exist in dissociated forms like amino-acids and carry out various biological functions, in fact, almost all body reactions occur with the participation of proteins. This is one of the reasons why the analysis of proteins has become a major issue in biology. In a more concrete way, the identification of conserved patterns in a set of related protein sequences can provide relevant biological information about these protein functions. In this paper, we present a novel algorithm based on teaching learning based optimization (TLBO) combined with a local search function specialized to predict common patterns in sets of protein sequences. This population-based evolutionary algorithm defines a group of individuals (solutions) that enhance their knowledge (quality) by means of different learning stages. Thus, if we correctly adapt it to the biological context of the mentioned problem, we can get an acceptable set of quality solutions. To evaluate the performance of the proposed technique, we have used six instances composed of different related protein sequences obtained from the PROSITE database. As we will see, the designed approach makes good predictions and improves the quality of the solutions found by other well-known biological tools.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mazza:2015:FIA, author = "Tommaso Mazza and Caterina Fusilli and Chiara Saracino and Gianluigi Mazzoccoli and Francesca Tavano and Manlio Vinciguerra and Valerio Pazienza", title = "Functional impact of autophagy-related genes on the homeostasis and dynamics of pancreatic cancer cell lines", journal = j-TCBB, volume = "12", number = "3", pages = "667--678", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2371824", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pancreatic cancer is a highly aggressive and chemotherapy-resistant malignant neoplasm. In basal condition, it is characterized by elevated autophagy activity, which is required for tumor growth and that correlates with treatment failure. We analyzed the expression of autophagy related genes in different pancreatic cancer cell lines. A correlation-based network analysis evidenced the sociality and topological roles of the autophagy-related genes after serum starvation. Structural and functional tests identified a core set of autophagy related genes, suggesting different scenarios of autophagic responses to starvation, which may be responsible for the clinical variations associated with pancreatic cancer pathogenesis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xia:2015:IGA, author = "Hong Xia and Yuanning Liu and Minghui Wang and Ao Li", title = "Identification of genomic aberrations in cancer subclones from heterogeneous tumor samples", journal = j-TCBB, volume = "12", number = "3", pages = "679--685", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tumor samples are usually heterogeneous, containing admixture of more than one kind of tumor subclones. Studies of genomic aberrations from heterogeneous tumor data are hindered by the mixed signal of tumor subclone cells. Most of the existing algorithms cannot distinguish contributions of different subclones from the measured single nucleotide polymorphism (SNP) array signals, which may cause erroneous estimation of genomic aberrations. Here, we have introduced a computational method, Cancer Heterogeneity Analysis from SNP-array Experiments (CHASE), to automatically detect subclone proportions and genomic aberrations from heterogeneous tumor samples. Our method is based on HMM, and incorporates EM algorithm to build a statistical model for modeling mixed signal of multiple tumor subclones. We tested the proposed approach on simulated datasets and two real datasets, and the results show that the proposed method can efficiently estimate tumor subclone proportions and recovery the genomic aberrations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2015:PBM, author = "Dandan Song and Jiaxing Chen and Guang Chen and Ning Li and Jin Li and Jun Fan and Dongbo Bu and Shuai Cheng Li", title = "Parameterized {BLOSUM} matrices for protein alignment", journal = j-TCBB, volume = "12", number = "3", pages = "686--694", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein alignment is a basic step for many molecular biology researches. The BLOSUM matrices, especially BLOSUM62, are the de facto standard matrices for protein alignments. However, after widely utilization of the matrices for 15 years, programming errors were surprisingly found in the initial version of source codes for their generation. And amazingly, after bug correction, the ``intended'' BLOSUM62 matrix performs consistently worse than the ``miscalculated'' one. In this paper, we find linear relationships among the eigenvalues of the matrices and propose an algorithm to find optimal unified eigenvectors. With them, we can parameterize matrix BLOSUMx for any given variable x that could change continuously. We compare the effectiveness of our parameterized isentropic matrix with BLOSUM62. Furthermore, an iterative alignment and matrix selection process is proposed to adaptively find the best parameter and globally align two sequences. Experiments are conducted on aligning 13,667 families of Pfam database and on clustering MHC II protein sequences, whose improved accuracy demonstrates the effectiveness of our proposed method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ding:2015:SHO, author = "Xiaojun Ding and Jianxin Wang and Alex Zelikovsky and Xuan Guo and Minzhu Xie and Yi Pan", title = "Searching high-order {SNP} combinations for complex diseases based on energy distribution difference", journal = j-TCBB, volume = "12", number = "3", pages = "695--704", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2363459", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Single nucleotide polymorphisms, a dominant type of genetic variants, have been used successfully to identify defective genes causing human single gene diseases. However, most common human diseases are complex diseases and caused by gene-gene and gene-environment interactions. Many SNP-SNP interaction analysis methods have been introduced but they are not powerful enough to discover interactions more than three SNPs. The paper proposes a novel method that analyzes all SNPs simultaneously. Different from existing methods, the method regards an individual's genotype data on a list of SNPs as a point with a unit of energy in a multi-dimensional space, and tries to find a new coordinate system where the energy distribution difference between cases and controls reaches the maximum. The method will find different multiple SNPs combinatorial patterns between cases and controls based on the new coordinate system. The experiment on simulated data shows that the method is efficient. The tests on the real data of age-related macular degeneration (AMD) disease show that it can find out more significant multi-SNP combinatorial patterns than existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Boareto:2015:SVR, author = "Marcelo Boareto and Jonatas Cesar and Vitor B. P. Leite and Nestor Caticha", title = "Supervised variational relevance learning, an analytic geometric feature selection with applications to omic datasets", journal = j-TCBB, volume = "12", number = "3", pages = "705--711", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2377750", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We introduce Supervised Variational Relevance Learning (Suvrel), a variational method to determine metric tensors to define distance based similarity in pattern classification, inspired in relevance learning. The variational method is applied to a cost function that penalizes large intraclass distances and favors small interclass distances. We find analytically the metric tensor that minimizes the cost function. Preprocessing the patterns by doing linear transformations using the metric tensor yields a dataset which can be more efficiently classified. We test our methods using publicly available datasets, for some standard classifiers. Among these datasets, two were tested by the MAQC-II project and, even without the use of further preprocessing, our results improve on their performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hsu:2015:SBF, author = "Chih-Yuan Hsu and Zhen-Ming Pan and Rei-Hsing Hu and Chih-Chun Chang and Hsiao-Chun Cheng and Che Lin and Bor-Sen Chen", title = "Systematic biological filter design with a desired {I/O} filtering response based on promoter-{RBS} libraries", journal = j-TCBB, volume = "12", number = "3", pages = "712--725", month = may, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2372790", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this study, robust biological filters with an external control to match a desired input/output (I/O) filtering response are engineered based on the well-characterized promoter-RBS libraries and a cascade gene circuit topology. In the field of synthetic biology, the biological filter system serves as a powerful detector or sensor to sense different molecular signals and produces a specific output response only if the concentration of the input molecular signal is higher or lower than a specified threshold. The proposed systematic design method of robust biological filters is summarized into three steps. Firstly, several well-characterized promoter-RBS libraries are established for biological filter design by identifying and collecting the quantitative and qualitative characteristics of their promoter-RBS components via nonlinear parameter estimation method. Then, the topology of synthetic biological filter is decomposed into three cascade gene regulatory modules, and an appropriate promoter-RBS library is selected for each module to achieve the desired I/O specification of a biological filter. Finally, based on the proposed systematic method, a robust externally tunable biological filter is engineered by searching the promoter-RBS component libraries and a control inducer concentration library to achieve the optimal reference match for the specified I/O filtering response.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Janga:2015:KDU, author = "Sarath Chandra Janga and Dongxiao Zhu and Jake Y. Chen and Mohammed J. Zaki", title = "Knowledge discovery using big data in biomedical systems", journal = j-TCBB, volume = "12", number = "4", pages = "726--728", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2454551", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The 13th International Workshop on Data Mining in Bioinformatics (BIOKDD'14) was organized in conjunction with the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining on August 24, 2014 in New York, USA. It brought together international researchers in the interacting disciplines of data mining, systems biology, and bioinformatics at the Bloomberg Headquarters venue. The goal of this workshop is to encourage Knowledge Discovery and Data mining (KDD) researchers to take on the numerous challenges that Bioinformatics offers. This year, the workshop featured the theme of ``Knowledge discovery using big data in biological/biomedical systems''.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Koneru:2015:DCA, author = "Suvarna Vani Koneru and Bhavani S. Durga", title = "Divide and conquer approach to contact map overlap problem using {$2$D}-pattern mining of protein contact networks", journal = j-TCBB, volume = "12", number = "4", pages = "729--737", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394402", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A novel approach to Contact Map Overlap (CMO) problem is proposed using the two dimensional clusters present in the contact maps. Each protein is represented as a set of the non-trivial clusters of contacts extracted from its contact map. The approach involves finding matching regions between the two contact maps using approximate 2D-pattern matching algorithm and dynamic programming technique. These matched pairs of small contact maps are submitted in parallel to a fast heuristic CMO algorithm. The approach facilitates parallelization at this level since all the pairs of contact maps can be submitted to the algorithm in parallel. Then, a merge algorithm is used in order to obtain the overall alignment. As a proof of concept, MSVNS, a heuristic CMO algorithm is used for global as well as local alignment. The divide and conquer approach is evaluated for two benchmark data sets that of Skolnick and Ding et al. It is interesting to note that along with achieving saving of time, better overlap is also obtained for certain protein folds.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Henriques:2015:BFP, author = "Rui Henriques and Sara C. Madeira", title = "Biclustering with flexible plaid models to unravel interactions between biological processes", journal = j-TCBB, volume = "12", number = "4", pages = "738--752", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2388206", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genes can participate in multiple biological processes at a time and thus their expression can be seen as a composition of the contributions from the active processes. Biclustering under a plaid assumption allows the modeling of interactions between transcriptional modules or biclusters (subsets of genes with coherence across subsets of conditions) by assuming an additive composition of contributions in their overlapping areas. Despite the biological interest of plaid models, few biclustering algorithms consider plaid effects and, when they do, they place restrictions on the allowed types and structures of biclusters, and suffer from robustness problems by seizing exact additive matchings. We propose BiP (Biclustering using Plaid models), a biclustering algorithm with relaxations to allow expression levels to change in overlapping areas according to biologically meaningful assumptions (weighted and noise-tolerant composition of contributions). BiP can be used over existing biclustering solutions (seizing their benefits) as it is able to recover excluded areas due to unaccounted plaid effects and detect noisy areas non-explained by a plaid assumption, thus producing an explanatory model of overlapping transcriptional activity. Experiments on synthetic data support BiP's efficiency and effectiveness. The learned models from expression data unravel meaningful and non-trivial functional interactions between biological processes associated with putative regulatory modules.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vogt:2015:USD, author = "Julia E. Vogt", title = "Unsupervised structure detection in biomedical data", journal = j-TCBB, volume = "12", number = "4", pages = "753--760", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394408", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A major challenge in computational biology is to find simple representations of high-dimensional data that best reveal the underlying structure. In this work, we present an intuitive and easy-to-implement method based on ranked neighborhood comparisons that detects structure in unsupervised data. The method is based on ordering objects in terms of similarity and on the mutual overlap of nearest neighbors. This basic framework was originally introduced in the field of social network analysis to detect actor communities. We demonstrate that the same ideas can successfully be applied to biomedical data sets in order to reveal complex underlying structure. The algorithm is very efficient and works on distance data directly without requiring a vectorial embedding of data. Comprehensive experiments demonstrate the validity of this approach. Comparisons with state-of-the-art clustering methods show that the presented method outperforms hierarchical methods as well as density based clustering methods and model-based clustering. A further advantage of the method is that it simultaneously provides a visualization of the data. Especially in biomedical applications, the visualization of data can be used as a first pre-processing step when analyzing real world data sets to get an intuition of the underlying data structure. We apply this model to synthetic data as well as to various biomedical data sets which demonstrate the high quality and usefulness of the inferred structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shibuya:2015:GEI, author = "Tetsuo Shibuya and Chuan Yi Tang and Paul Horton and Kiyoshi Asai", title = "Guest editorial for the {25th International Conference on Genome Informatics (GIW\slash ISCB-Asia 2014)}", journal = j-TCBB, volume = "12", number = "4", pages = "761--762", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2439791", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The International Conference on Genome Informatics, known as ``GIW'', is one of the longest running annual conferences in bioinformatics or computational biology. GIW has played an important role in the development of the bioinformatics community in the Asia-Pacific region since its establishment in 1990. The first GIW was held as a Japanese workshop (called the Genome Informatics Workshop) in Tokyo, Japan, in 1990. It has been held annually since then, switching from a domestic conference to an international one in 1993 and updating its official name to the current ``International Conference on Genome Informatics'' in 2001. GIW was held in Japan (Tokyo or Yokohama) until 2006, but since then GIW has been held in various locations in the Asia-Pacific region. The 18th GIW was held in Singapore in 2007. In the following years, GIW was held in Gold Coast, Australia (2008); Yokohama, Japan (2009); Hangzhou, China (2010); Busan, Korea (2011); Tainan, Taiwan (2012); and again in Singapore in 2013. With its long history and track record in attracting state-of-the art bioinformatics research in general, and especially algorithmic work, GIW is arguably not only the top bioinformatics conference in the Asia-Pacific region, but also one of the most important worldwide. Now solidly established as an international conference, GIW will be revisiting its birthplace, Tokyo, for 2014 and 2015.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Al-Jarrah:2015:RSL, author = "Omar Y. Al-Jarrah and Paul D. Yoo and Kamal Taha and Sami Muhaidat and Abdallah Shami and Nazar Zaki", title = "Randomized subspace learning for proline cis--trans isomerization prediction", journal = j-TCBB, volume = "12", number = "4", pages = "763--769", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2369040", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proline residues are common source of kinetic complications during folding. The X-Pro peptide bond is the only peptide bond for which the stability of the cis and trans conformations is comparable. The cis--trans isomerization (CTI) of X-Pro peptide bonds is a widely recognized rate-limiting factor, which can not only induces additional slow phases in protein folding but also modifies the millisecond and sub-millisecond dynamics of the protein. An accurate computational prediction of proline CTI is of great importance for the understanding of protein folding, splicing, cell signaling, and transmembrane active transport in both the human body and animals. In our earlier work, we successfully developed a biophysically motivated proline CTI predictor utilizing a novel tree-based consensus model with a powerful metalearning technique and achieved 86.58 percent Q2 accuracy and 0.74 Mcc, which is a better result than the results (70-73 percent Q2 accuracies) reported in the literature on the well-referenced benchmark dataset. In this paper, we describe experiments with novel randomized subspace learning and bootstrap seeding techniques as an extension to our earlier work, the consensus models as well as entropy-based learning methods, to obtain better accuracy through a precise and robust learning scheme for proline CTI prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sung:2015:MLM, author = "Wing-Kin Sung and Kunihiko Sadakane and Tetsuo Shibuya and Abha Belorkar and Iana Pyrogova", title = "An {$ O(m \log m) $}-time algorithm for detecting superbubbles", journal = j-TCBB, volume = "12", number = "4", pages = "770--777", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2385696", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In genome assembly graphs, motifs such as tips, bubbles, and cross links are studied in order to find sequencing errors and to understand the nature of the genome. Superbubble, a complex generalization of bubbles, was recently proposed as an important subgraph class for analyzing assembly graphs. At present, a quadratic time algorithm is known. This paper gives an O ( m log m )-time algorithm to solve this problem for a graph with m edges.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Seok:2015:ESA, author = "Ho-Sik Seok and Taemin Song and Sek Won Kong and Kyu-Baek Hwang", title = "An efficient search algorithm for finding genomic-range overlaps based on the maximum range length", journal = j-TCBB, volume = "12", number = "4", pages = "778--784", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2369042", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Efficient search algorithms for finding genomic-range overlaps are essential for various bioinformatics applications. A majority of fast algorithms for searching the overlaps between a query range (e.g., a genomic variant) and a set of N reference ranges (e.g., exons) has time complexity of O ( k + log N ), where k denotes a term related to the length and location of the reference ranges. Here, we present a simple but efficient algorithm that reduces k, based on the maximum reference range length. Specifically, for a given query range and the maximum reference range length, the proposed method divides the reference range set into three subsets: always, potentially, and never overlapping. Therefore, search effort can be reduced by excluding never overlapping subset. We demonstrate that the running time of the proposed algorithm is proportional to potentially overlapping subset size, that is proportional to the maximum reference range length if all the other conditions are the same. Moreover, an implementation of our algorithm was 13.8 to 30.0 percent faster than one of the fastest range search methods available when tested on various genomic-range data sets. The proposed algorithm has been incorporated into a disease-linked variant prioritization pipeline for WGS (http://gnome.tchlab.org) and its implementation is available at http://ml.ssu.ac.kr/gSearch.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hsu:2015:CNE, author = "Yi-Yu Hsu and Hung-Yu Kao", title = "Curatable named-entity recognition using semantic relations", journal = j-TCBB, volume = "12", number = "4", pages = "785--792", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366770", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Named-entity recognition (NER) plays an important role in the development of biomedical databases. However, the existing NER tools produce multifarious named-entities which may result in both curatable and non-curatable markers. To facilitate biocuration with a straightforward approach, classifying curatable named-entities is helpful with regard to accelerating the biocuration workflow. Co-occurrence Interaction Nexus with Named-entity Recognition (CoINNER) is a web-based tool that allows users to identify genes, chemicals, diseases, and action term mentions in the Comparative Toxicogenomic Database (CTD). To further discover interactions, CoINNER uses multiple advanced algorithms to recognize the mentions in the BioCreative IV CTD Track. CoINNER is developed based on a prototype system that annotated gene, chemical, and disease mentions in PubMed abstracts at BioCreative 2012 Track I (literature triage). We extended our previous system in developing CoINNER. The pre-tagging results of CoINNER were developed based on the state-of-the-art named entity recognition tools in BioCreative III. Next, a method based on conditional random fields (CRFs) is proposed to predict chemical and disease mentions in the articles. Finally, action term mentions were collected by latent Dirichlet allocation (LDA). At the BioCreative IV CTD Track, the best F-measures reached for gene/protein, chemical/drug and disease NER were 54 percent while CoINNER achieved a 61.5 percent F-measure. System URL: http://ikmbio.csie.ncku.edu.tw/coinner/introduction.htm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2015:GEI, author = "Dong Xu and Kun Huang and Jeanette Schmidt", title = "Guest editors introduction to the special section on software and databases", journal = j-TCBB, volume = "12", number = "4", pages = "793--794", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2454931", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Software tools and information systems in bioinformatics and computational biology are playing more and more important roles in biology and medical research. This special section consists of a selection of papers focusing on software and databases that are central in bioinformatics and computational biology. Following a rigorous review process, 11 papers were selected for publication. These papers cover a broad range of topics, including computational genomics and transcriptomics, analysis of biological networks and interactions, drug design, biomedical signal/image analysis, biomedical text mining and ontologies, biological data mining, visualization and integration, and high performance computing application in bioinformatics.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feng:2015:RES, author = "Weixing Feng and Peichao Sang and Deyuan Lian and Yansheng Dong and Fengfei Song and Meng Li and Bo He and Fenglin Cao and Yunlong Liu", title = "{ResSeq}: enhancing short-read sequencing alignment by rescuing error-containing reads", journal = j-TCBB, volume = "12", number = "4", pages = "795--798", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366103", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Next-generation short-read sequencing is widely utilized in genomic studies. Biological applications require an alignment step to map sequencing reads to the reference genome, before acquiring expected genomic information. This requirement makes alignment accuracy a key factor for effective biological interpretation. Normally, when accounting for measurement errors and single nucleotide polymorphisms, short read mappings with a few mismatches are generally considered acceptable. However, to further improve the efficiency of short-read sequencing alignment, we propose a method to retrieve additional reliably aligned reads (reads with more than a pre-defined number of mismatches), using a Bayesian-based approach. In this method, we first retrieve the sequence context around the mismatched nucleotides within the already aligned reads; these loci contain the genomic features where sequencing errors occur. Then, using the derived pattern, we evaluate the remaining (typically discarded) reads with more than the allowed number of mismatches, and calculate a score that represents the probability that a specific alignment is correct. This strategy allows the extraction of more reliably aligned reads, therefore improving alignment sensitivity. Implementation: The source code of our tool, ResSeq, can be downloaded from: https://github.com/hrbeubiocenter/Resseq.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Segar:2015:MTD, author = "Matthew W. Segar and Cynthia J. Sakofsky and Anna Malkova and Yunlong Liu", title = "{MMBIRFinder}: a tool to detect microhomology-mediated break-induced replication", journal = j-TCBB, volume = "12", number = "4", pages = "799--806", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359450", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The introduction of next-generation sequencing technologies has radically changed the way we view structural genetic events. Microhomology-mediated break-induced replication (MMBIR) is just one of the many mechanisms that can cause genomic destabilization that may lead to cancer. Although the mechanism for MMBIR remains unclear, it has been shown that MMBIR is typically associated with template-switching events. Currently, to our knowledge, there is no existing bioinformatics tool to detect these template-switching events. We have developed MMBIRFinder, a method that detects template-switching events associated with MMBIR from whole-genome sequenced data. MMBIRFinder uses a half-read alignment approach to identify potential regions of interest. Clustering of these potential regions helps narrow the search space to regions with strong evidence. Subsequent local alignments identify the template-switching events with single-nucleotide accuracy. Using simulated data, MMBIRFinder identified 83 percent of the MMBIR regions within a five nucleotide tolerance. Using real data, MMBIRFinder identified 16 MMBIR regions on a normal breast tissue data sample and 51 MMBIR regions on a triple-negative breast cancer tumor sample resulting in detection of 37 novel template-switching events. Finally, we identified template-switching events residing in the promoter region of seven genes that have been implicated in breast cancer. The program is freely available for download at https://github.com/msegar/MMBIRFinder.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Matthews:2015:HCL, author = "Suzanne J. Matthews", title = "Heterogeneous compression of large collections of evolutionary trees", journal = j-TCBB, volume = "12", number = "4", pages = "807--814", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2366756", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Compressing heterogeneous collections of trees is an open problem in computational phylogenetics. In a heterogeneous tree collection, each tree can contain a unique set of taxa. An ideal compression method would allow for the efficient archival of large tree collections and enable scientists to identify common evolutionary relationships over disparate analyses. In this paper, we extend TreeZip to compress heterogeneous collections of trees. TreeZip is the most efficient algorithm for compressing homogeneous tree collections. To the best of our knowledge, no other domain-based compression algorithm exists for large heterogeneous tree collections or enable their rapid analysis. Our experimental results indicate that TreeZip averages 89.03 percent (72.69 percent) space savings on unweighted (weighted) collections of trees when the level of heterogeneity in a collection is moderate. The organization of the TRZ file allows for efficient computations over heterogeneous data. For example, consensus trees can be computed in mere seconds. Lastly, combining the TreeZip compressed (TRZ) file with general-purpose compression yields average space savings of 97.34 percent (81.43 percent) on unweighted (weighted) collections of trees. Our results lead us to believe that TreeZip will prove invaluable in the efficient archival of tree collections, and enables scientists to develop novel methods for relating heterogeneous collections of trees.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2015:CCA, author = "Jianxin Wang and Jiancheng Zhong and Gang Chen and Min Li and Fang-xiang Wu and Yi Pan", title = "{ClusterViz}: a cytoscape {APP} for cluster analysis of biological network", journal = j-TCBB, volume = "12", number = "4", pages = "815--822", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361348", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cluster analysis of biological networks is one of the most important approaches for identifying functional modules and predicting protein functions. Furthermore, visualization of clustering results is crucial to uncover the structure of biological networks. In this paper, ClusterViz, an APP of Cytoscape 3 for cluster analysis and visualization, has been developed. In order to reduce complexity and enable extendibility for ClusterViz, we designed the architecture of ClusterViz based on the framework of Open Services Gateway Initiative. According to the architecture, the implementation of ClusterViz is partitioned into three modules including interface of ClusterViz, clustering algorithms and visualization and export. ClusterViz fascinates the comparison of the results of different algorithms to do further related analysis. Three commonly used clustering algorithms, FAG-EC, EAGLE and MCODE, are included in the current version. Due to adopting the abstract interface of algorithms in module of the clustering algorithms, more clustering algorithms can be included for the future use. To illustrate usability of ClusterViz, we provided three examples with detailed steps from the important scientific articles, which show that our tool has helped several research teams do their research work on the mechanism of the biological networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nepomuceno-Chamorro:2015:BTA, author = "Isabel A. Nepomuceno-Chamorro and Alfonso Marquez-Chamorro and Jesus S. Aguilar-Ruiz", title = "Building transcriptional association networks in cytoscape with {RegNetC}", journal = j-TCBB, volume = "12", number = "4", pages = "823--824", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2385702", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Regression Network plugin for Cytoscape ( RegNetC ) implements the RegNet algorithm for the inference of transcriptional association network from gene expression profiles. This algorithm is a model tree-based method to detect the relationship between each gene and the remaining genes simultaneously instead of analyzing individually each pair of genes as correlation-based methods do. Model trees are a very useful technique to estimate the gene expression value by regression models and favours localized similarities over more global similarity, which is one of the major drawbacks of correlation-based methods. Here, we present an integrated software suite, named RegNetC, as a Cytoscape plugin that can operate on its own as well. RegNetC facilitates, according to user-defined parameters, the resulted transcriptional gene association network in .sif format for visualization, analysis and interoperates with other Cytoscape plugins, which can be exported for publication figures. In addition to the network, the RegNetC plugin also provides the quantitative relationships between genes expression values of those genes involved in the inferred network, i.e., those defined by the regression models.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Taha:2015:ISI, author = "Kamal Taha and Paul D. Yoo and Mohammed Alzaabi", title = "{iPFPi}: a system for improving protein function prediction through cumulative iterations", journal = j-TCBB, volume = "12", number = "4", pages = "825--836", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2344681", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a classifier system called iPFPi that predicts the functions of un-annotated proteins. iPFPi assigns an un-annotated protein P the functions of GO annotation terms that are semantically similar to P. An un-annotated protein P and a GO annotation term T are represented by their characteristics. The characteristics of P are GO terms found within the abstracts of biomedical literature associated with P. The characteristics of T are GO terms found within the abstracts of biomedical literature associated with the proteins annotated with the function of T. Let F and F ' be the important (dominant) sets of characteristic terms representing T and P, respectively. iPFPi would annotate P with the function of T, if F and F ' are semantically similar. We constructed a novel semantic similarity measure that takes into consideration several factors, such as the dominance degree of each characteristic term t in set F based on its score, which is a value that reflects the dominance status of t relative to other characteristic terms, using pairwise beats and looses procedure. Every time a protein P is annotated with the function of T, iPFPi updates and optimizes the current scores of the characteristic terms for T based on the weights of the characteristic terms for P. Set F will be updated accordingly. Thus, the accuracy of predicting the function of T as the function of subsequent proteins improves. This prediction accuracy keeps improving over time iteratively through the cumulative weights of the characteristic terms representing proteins that are successively annotated with the function of T. We evaluated the quality of iPFPi by comparing it experimentally with two recent protein function prediction systems. Results showed marked improvement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chicco:2015:SSG, author = "Davide Chicco and Marco Masseroli", title = "Software suite for gene and protein annotation prediction and similarity search", journal = j-TCBB, volume = "12", number = "4", pages = "837--843", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2382127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the computational biology community, machine learning algorithms are key instruments for many applications, including the prediction of gene-functions based upon the available biomolecular annotations. Additionally, they may also be employed to compute similarity between genes or proteins. Here, we describe and discuss a software suite we developed to implement and make publicly available some of such prediction methods and a computational technique based upon Latent Semantic Indexing (LSI), which leverages both inferred and available annotations to search for semantically similar genes. The suite consists of three components. BioAnnotationPredictor is a computational software module to predict new gene-functions based upon Singular Value Decomposition of available annotations. SimilBio is a Web module that leverages annotations available or predicted by BioAnnotationPredictor to discover similarities between genes via LSI. The suite includes also SemSim, a new Web service built upon these modules to allow accessing them programmatically. We integrated SemSim in the Bio Search Computing framework (http://www.bioinformatics.deib.polimi.it/bio-seco/seco/), where users can exploit the Search Computing technology to run multi-topic complex queries on multiple integrated Web services. Accordingly, researchers may obtain ranked answers involving the computation of the functional similarity between genes in support of biomedical knowledge discovery.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xiang:2015:RAA, author = "Yan-Ping Xiang and Ke Liu and Xian-Ying Cheng and Cheng Cheng and Fang Gong and Jian-Bo Pan and Zhi-Liang Ji", title = "Rapid assessment of adverse drug reactions by statistical solution of gene association network", journal = j-TCBB, volume = "12", number = "4", pages = "844--850", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2338292", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Adverse drug reaction (ADR) is a common clinical problem, sometimes accompanying with high risk of mortality and morbidity. It is also one of the major factors that lead to failure in new drug development. Unfortunately, most of current experimental and computational methods are unable to evaluate clinical safety of drug candidates in early drug discovery stage due to the very limited knowledge of molecular mechanisms underlying ADRs. Therefore, in this study, we proposed a novel na{\"\i}ve Bayesian model for rapid assessment of clinical ADRs with frequency estimation. This model was constructed on a gene-ADR association network, which covered 611 US FDA approved drugs, 14,251 genes, and 1,254 distinct ADR terms. An average detection rate of 99.86 and 99.73 percent were achieved eventually in identification of known ADRs in internal test data set and external case analyses respectively. Moreover, a comparative analysis between the estimated frequencies of ADRs and their observed frequencies was undertaken. It is observed that these two frequencies have the similar distribution trend. These results suggest that the na{\"\i}ve Bayesian model based on gene-ADR association network can serve as an efficient and economic tool in rapid ADRs assessment.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Barma:2015:QMS, author = "Shovan Barma and Bo-Wei Chen and Ka Lok Man and Jhing-Fa Wang", title = "Quantitative measurement of split of the second heart sound ({S2})", journal = j-TCBB, volume = "12", number = "4", pages = "851--860", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351804", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This study proposes a quantitative measurement of split of the second heart sound (S2) based on nonstationary signal decomposition to deal with overlaps and energy modeling of the subcomponents of S2. The second heart sound includes aortic (A2) and pulmonic (P2) closure sounds. However, the split detection is obscured due to A2-P2 overlap and low energy of P2. To identify such split, HVD method is used to decompose the S2 into a number of components while preserving the phase information. Further, A2s and P2s are localized using smoothed pseudo Wigner-Ville distribution followed by reassignment method. Finally, the split is calculated by taking the differences between the means of time indices of A2s and P2s. Experiments on total 33 clips of S2 signals are performed for evaluation of the method. The mean \pm standard deviation of the split is 34.7 \pm 4.6 ms. The method measures the split efficiently, even when A2-P2 overlap is {$<$}= 20 ms and the normalized peak temporal ratio of P2 to A2 is low ({$>$}= 0.22). This proposed method thus, demonstrates its robustness by defining split detectability (SDT), the split detection aptness through detecting P2s, by measuring upto 96 percent. Such findings reveal the effectiveness of the method as competent against the other baselines, especially for A2-P2 overlaps and low energy P2.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Golshan:2015:OLB, author = "Hosein M. Golshan and Reza P. R. Hasanzadeh", title = "An optimized {LMMSE} based method for {$3$D} {MRI} denoising", journal = j-TCBB, volume = "12", number = "4", pages = "861--870", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2344675", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Post-acquisition denoising of magnetic resonance (MR) images is an important step to improve any quantitative measurement of the acquired data. In this paper, assuming a Rician noise model, a new filtering method based on the linear minimum mean square error (LMMSE) estimation is introduced, which employs the self-similarity property of the MR data to restore the noise-less signal. This method takes into account the structural characteristics of images and the Bayesian mean square error (Bmse) of the estimator to address the denoising problem. In general, a twofold data processing approach is developed; first, the noisy MR data is processed using a patch-based L$^2$ -norm similarity measure to provide the primary set of samples required for the estimation process. Afterwards, the Bmse of the estimator is derived as the optimization function to analyze the pre-selected samples and minimize the error between the estimated and the underlying signal. Compared to the LMMSE method and also its recently proposed SNR-adapted realization (SNLMMSE), the optimized way of choosing the samples together with the automatic adjustment of the filtering parameters lead to a more robust estimation performance with our approach. Experimental results show the competitive performance of the proposed method in comparison with related state-of-the-art methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Varghese:2015:RLW, author = "Blesson Varghese and Ishan Patel and Adam Barker", title = "{RBioCloud}: a light-weight framework for bioconductor and {R}-based jobs on the cloud", journal = j-TCBB, volume = "12", number = "4", pages = "871--878", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2361327", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Large-scale ad hoc analytics of genomic data is popular using the R-programming language supported by over 700 software packages provided by Bioconductor. More recently, analytical jobs are benefitting from on-demand computing and storage, their scalability and their low maintenance cost, all of which are offered by the cloud. While biologists and bioinformaticists can take an analytical job and execute it on their personal workstations, it remains challenging to seamlessly execute the job on the cloud infrastructure without extensive knowledge of the cloud dashboard. How analytical jobs can not only with minimum effort be executed on the cloud, but also how both the resources and data required by the job can be managed is explored in this paper. An open-source light-weight framework for executing R-scripts using Bioconductor packages, referred to as 'RBioCloud', is designed and developed. RBioCloud offers a set of simple command-line tools for managing the cloud resources, the data and the execution of the job. Three biological test cases validate the feasibility of RBioCloud. The framework is available from http://www.rbiocloud.com.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2015:NMD, author = "Wei Zhang and Xiufen Zou", title = "A new method for detecting protein complexes based on the three node cliques", journal = j-TCBB, volume = "12", number = "4", pages = "879--886", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2386314", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The identification of protein complexes in protein-protein interaction (PPI) networks is fundamental for understanding biological processes and cellular molecular mechanisms. Many graph computational algorithms have been proposed to identify protein complexes from PPI networks by detecting densely connected groups of proteins. These algorithms assess the density of subgraphs through evaluation of the sum of individual edges or nodes; thus, incomplete and inaccurate measures may miss meaningful biological protein complexes with functional significance. In this study, we propose a novel method for assessing the compactness of local subnetworks by measuring the number of three node cliques. The present method detects each optimal cluster by growing a seed and maximizing the compactness function. To demonstrate the efficacy of the new proposed method, we evaluate its performance using five PPI networks on three reference sets of yeast protein complexes with five different measurements and compare the performance of the proposed method with four state-of-the-art methods. The results show that the protein complexes generated by the proposed method are of better quality than those generated by four classic methods. Therefore, the new proposed method is effective and useful for detecting protein complexes in PPI networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2015:AFC, author = "Zhiwen Yu and Hantao Chen and Jane You and Jiming Liu and Hau-San Wong and Guoqiang Han and Le Li", title = "Adaptive fuzzy consensus clustering framework for clustering analysis of cancer data", journal = j-TCBB, volume = "12", number = "4", pages = "887--901", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359433", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Performing clustering analysis is one of the important research topics in cancer discovery using gene expression profiles, which is crucial in facilitating the successful diagnosis and treatment of cancer. While there are quite a number of research works which perform tumor clustering, few of them considers how to incorporate fuzzy theory together with an optimization process into a consensus clustering framework to improve the performance of clustering analysis. In this paper, we first propose a random double clustering based cluster ensemble framework (RDCCE) to perform tumor clustering based on gene expression data. Specifically, RDCCE generates a set of representative features using a randomly selected clustering algorithm in the ensemble, and then assigns samples to their corresponding clusters based on the grouping results. In addition, we also introduce the random double clustering based fuzzy cluster ensemble framework (RDCFCE), which is designed to improve the performance of RDCCE by integrating the newly proposed fuzzy extension model into the ensemble framework. RDCFCE adopts the normalized cut algorithm as the consensus function to summarize the fuzzy matrices generated by the fuzzy extension models, partition the consensus matrix, and obtain the final result. Finally, adaptive RDCFCE (A-RDCFCE) is proposed to optimize RDCFCE and improve the performance of RDCFCE further by adopting a self-evolutionary process (SEPP) for the parameter set. Experiments on real cancer gene expression profiles indicate that RDCFCE and A-RDCFCE works well on these data sets, and outperform most of the state-of-the-art tumor clustering algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Deng:2015:IFF, author = "Lei Deng and Zhigang Chen", title = "An integrated framework for functional annotation of protein structural domains", journal = j-TCBB, volume = "12", number = "4", pages = "902--913", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2389213", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Structural domains are evolutionary and functional units of proteins and play a critical role in comparative and functional genomics. Computational assignment of domain function with high reliability is essential for understanding whole-protein functions. However, functional annotations are conventionally assigned onto full-length proteins rather than associating specific functions to the individual structural domains. In this article, we present Structural Domain Annotation (SDA), a novel computational approach to predict functions for SCOP structural domains. The SDA method integrates heterogeneous information sources, including structure alignment based protein-SCOP mapping features, InterPro2GO mapping information, PSSM Profiles, and sequence neighborhood features, with a Bayesian network. By large-scale annotating Gene Ontology terms to SCOP domains with SDA, we obtained a database of SCOP domain to Gene Ontology mappings, which contains ~162,000 out of the approximately 166,900 domains in SCOPe 2.03 ({$>$97} percent) and their predicted Gene Ontology functions. We have benchmarked SDA using a single-domain protein dataset and an independent dataset from different species. Comparative studies show that SDA significantly outperforms the existing function prediction methods for structural domains in terms of coverage and maximum F-measure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ranjbar:2015:BNM, author = "Mohammad R. Nezami Ranjbar and Mahlet G. Tadesse and Yue Wang and Habtom W. Ressom", title = "{Bayesian} normalization model for label-free quantitative analysis by {LC--MS}", journal = j-TCBB, volume = "12", number = "4", pages = "914--927", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2377723", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We introduce a new method for normalization of data acquired by liquid chromatography coupled with mass spectrometry (LC--MS) in label-free differential expression analysis. Normalization of LC--MS data is desired prior to subsequent statistical analysis to adjust variabilities in ion intensities that are not caused by biological differences but experimental bias. There are different sources of bias including variabilities during sample collection and sample storage, poor experimental design, noise, etc. In addition, instrument variability in experiments involving a large number of LC--MS runs leads to a significant drift in intensity measurements. Although various methods have been proposed for normalization of LC--MS data, there is no universally applicable approach. In this paper, we propose a Bayesian normalization model (BNM) that utilizes scan-level information from LC--MS data. Specifically, the proposed method uses peak shapes to model the scan-level data acquired from extracted ion chromatograms (EIC) with parameters considered as a linear mixed effects model. We extended the model into BNM with drift (BNMD) to compensate for the variability in intensity measurements due to long LC--MS runs. We evaluated the performance of our method using synthetic and experimental data. In comparison with several existing methods, the proposed BNM and BNMD yielded significant improvement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liang:2015:IDA, author = "Muxuan Liang and Zhizhong Li and Ting Chen and Jianyang Zeng", title = "Integrative data analysis of multi-platform cancer data with a multimodal deep learning approach", journal = j-TCBB, volume = "12", number = "4", pages = "928--937", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2377729", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of cancer subtypes plays an important role in revealing useful insights into disease pathogenesis and advancing personalized therapy. The recent development of high-throughput sequencing technologies has enabled the rapid collection of multi-platform genomic data (e.g., gene expression, miRNA expression, and DNA methylation) for the same set of tumor samples. Although numerous integrative clustering approaches have been developed to analyze cancer data, few of them are particularly designed to exploit both deep intrinsic statistical properties of each input modality and complex cross-modality correlations among multi-platform input data. In this paper, we propose a new machine learning model, called multimodal deep belief network (DBN), to cluster cancer patients from multi-platform observation data. In our integrative clustering framework, relationships among inherent features of each single modality are first encoded into multiple layers of hidden variables, and then a joint latent model is employed to fuse common features derived from multiple input modalities. A practical learning algorithm, called contrastive divergence (CD), is applied to infer the parameters of our multimodal DBN model in an unsupervised manner. Tests on two available cancer datasets show that our integrative data analysis approach can effectively extract a unified representation of latent features to capture both intra- and cross-modality correlations, and identify meaningful disease subtypes from multi-platform cancer data. In addition, our approach can identify key genes and miRNAs that may play distinct roles in the pathogenesis of different cancer subtypes. Among those key miRNAs, we found that the expression level of miR-29a is highly correlated with survival time in ovarian cancer patients. These results indicate that our multimodal DBN based data analysis approach may have practical applications in cancer pathogenesis studies and provide useful guidelines for personalized cancer therapy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dehghannasiri:2015:OED, author = "Roozbeh Dehghannasiri and Byung-Jun Yoon and Edward R. Dougherty", title = "Optimal experimental design for gene regulatory networks in the presence of uncertainty", journal = j-TCBB, volume = "12", number = "4", pages = "938--950", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2377733", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Of major interest to translational genomics is the intervention in gene regulatory networks (GRNs) to affect cell behavior; in particular, to alter pathological phenotypes. Owing to the complexity of GRNs, accurate network inference is practically challenging and GRN models often contain considerable amounts of uncertainty. Considering the cost and time required for conducting biological experiments, it is desirable to have a systematic method for prioritizing potential experiments so that an experiment can be chosen to optimally reduce network uncertainty. Moreover, from a translational perspective it is crucial that GRN uncertainty be quantified and reduced in a manner that pertains to the operational cost that it induces, such as the cost of network intervention. In this work, we utilize the concept of mean objective cost of uncertainty (MOCU) to propose a novel framework for optimal experimental design. In the proposed framework, potential experiments are prioritized based on the MOCU expected to remain after conducting the experiment. Based on this prioritization, one can select an optimal experiment with the largest potential to reduce the pertinent uncertainty present in the current network model. We demonstrate the effectiveness of the proposed method via extensive simulations based on synthetic and real gene regulatory networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2015:SDI, author = "Ting Chen and Ulisses M. Braga-Neto", title = "Statistical detection of intrinsically multivariate predictive genes", journal = j-TCBB, volume = "12", number = "4", pages = "951--963", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2377731", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Canalizing genes possess broad regulatory power over a wide swath of regulatory processes. On the other hand, it has been hypothesized that the phenomenon of intrinsically multivariate prediction (IMP) is associated with canalization. However, applications have relied on user-selectable thresholds on the IMP score to decide on the presence of IMP. A methodology is developed here that avoids arbitrary thresholds, by providing a statistical test for the IMP score. In addition, the proposed procedure allows the incorporation of prior knowledge if available, which can alleviate the problem of loss of power due to small sample sizes. The issue of multiplicity of tests is addressed by family-wise error rate (FWER) and false discovery rate (FDR) controlling approaches. The proposed methodology is demonstrated by experiments using synthetic and real gene-expression data from studies on melanoma and ionizing radiation (IR) responsive genes. The results with the real data identified DUSP1 and p53, two well-known canalizing genes associated with melanoma and IR response, respectively, as the genes with a clear majority of IMP predictor pairs. This validates the potential of the proposed methodology as a tool for discovery of canalizing genes from binary gene-expression data. The procedure is made available through an R package.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2015:RBT, author = "Jin-Xing Liu and Yong Xu and Chun-Hou Zheng and Heng Kong and Zhi-Hui Lai", title = "{RPCA}-based tumor classification using gene expression data", journal = j-TCBB, volume = "12", number = "4", pages = "964--970", month = jul, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2383375", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Sep 16 18:55:37 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microarray techniques have been used to delineate cancer groups or to identify candidate genes for cancer prognosis. As such problems can be viewed as classification ones, various classification methods have been applied to analyze or interpret gene expression data. In this paper, we propose a novel method based on robust principal component analysis (RPCA) to classify tumor samples of gene expression data. Firstly, RPCA is utilized to highlight the characteristic genes associated with a special biological process. Then, RPCA and RPCA+LDA (robust principal component analysis and linear discriminant analysis) are used to identify the features. Finally, support vector machine (SVM) is applied to classify the tumor samples of gene expression data based on the identified features. Experiments on seven data sets demonstrate that our methods are effective and feasible for tumor classification.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gomez-Pulido:2015:ABA, author = "Juan A. Gomez-Pulido and Bertil Schmidt and Wu-chun Feng", title = "Accelerating bioinformatics applications via emerging parallel computing systems", journal = j-TCBB, volume = "12", number = "5", pages = "971--972", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2457736", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fernandez:2015:FFB, author = "Edward B. Fernandez and Jason Villarreal and Stefano Lonardi and Walid A. Najjar", title = "{FHAST}: {FPGA}-Based Acceleration of Bowtie in Hardware", journal = j-TCBB, volume = "12", number = "5", pages = "973--981", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2405333", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gonzalez-Dominguez:2015:PED, author = "Jorge Gonzalez-Dominguez and Lars Wienbrandt and Jan Christian Kassens and David Ellinghaus and Manfred Schimmler and Bertil Schmidt", title = "Parallelizing Epistasis Detection in {GWAS} on {FPGA} and {GPU}-Accelerated Computing Systems", journal = j-TCBB, volume = "12", number = "5", pages = "982--994", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2389958", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Martinez:2015:CAS, author = "Hector Martinez and Joaquin Tarraga and Ignacio Medina and Sergio Barrachina and Maribel Castillo and Joaquin Dopazo and Enrique S. Quintana-Orti", title = "Concurrent and Accurate Short Read Mapping on Multicore Processors", journal = j-TCBB, volume = "12", number = "5", pages = "995--1007", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2392077", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Misra:2015:PMI, author = "Sanchit Misra and Kiran Pamnany and Srinivas Aluru", title = "Parallel Mutual Information Based Construction of Genome-Scale Networks on the {Intel Xeon Phi} Coprocessor", journal = j-TCBB, volume = "12", number = "5", pages = "1008--1020", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415931", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jeannin-Girardon:2015:LST, author = "Anne Jeannin-Girardon and Pascal Ballet and Vincent Rodin", title = "Large Scale Tissue Morphogenesis Simulation on Heterogeneous Systems Based on a Flexible Biomechanical Cell Model", journal = j-TCBB, volume = "12", number = "5", pages = "1021--1033", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2418994", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xin:2015:ASI, author = "Yao Xin and Will X. Y. Li and Zhaorui Zhang and Ray C. C. Cheung and Dong Song and Theodore W. Berger", title = "An Application Specific Instruction Set Processor {(ASIP)} for Adaptive Filters in Neural Prosthetics", journal = j-TCBB, volume = "12", number = "5", pages = "1034--1047", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2440248", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chacon:2015:BFI, author = "Alejandro Chacon and Santiago Marco-Sola and Antonio Espinosa and Paolo Ribeca and Juan Carlos Moure", title = "Boosting the {FM-Index} on the {GPU}: Effective Techniques to Mitigate Random Memory Access", journal = j-TCBB, volume = "12", number = "5", pages = "1048--1059", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2377716", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2015:EAO, author = "Thuy-Diem Nguyen and Bertil Schmidt and Zejun Zheng and Chee-Keong Kwoh", title = "Efficient and Accurate {OTU} Clustering with {GPU}-Based Sequence Alignment and Dynamic Dendrogram Cutting", journal = j-TCBB, volume = "12", number = "5", pages = "1060--1073", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2407574", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2015:GES, author = "Shihua Zhang and Luonan Chen", title = "Guest Editorial for Special Section on {ISB\slash TBC 2014}", journal = j-TCBB, volume = "12", number = "5", pages = "1074--1075", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2443211", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Frost:2015:IFG, author = "H. Robert Frost and Zhigang Li and Folkert W. Asselbergs and Jason H. Moore", title = "An Independent Filter for Gene Set Testing Based on Spectral Enrichment", journal = j-TCBB, volume = "12", number = "5", pages = "1076--1086", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415815", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chu:2015:EPY, author = "Dominique Chu and Anton Salykin", title = "Evolutionary Pressures on the Yeast Transcriptome", journal = j-TCBB, volume = "12", number = "5", pages = "1087--1093", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2420554", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kang:2015:ISO, author = "Hao Kang and Kwang-Hyun Cho and Xiaohua Douglas Zhang and Tao Zeng and Luonan Chen", title = "Inferring Sequential Order of Somatic Mutations during Tumorgenesis based on {Markov} Chain Model", journal = j-TCBB, volume = "12", number = "5", pages = "1094--1103", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424408", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meng:2015:ICG, author = "Nan Meng and Raghu Machiraju and Kun Huang", title = "Identify Critical Genes in Development with Consistent {H3K4me2} Patterns across Multiple Tissues", journal = j-TCBB, volume = "12", number = "5", pages = "1104--1111", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430340", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qiu:2015:IMA, author = "Yu-Qing Qiu and Xue Tian and Shihua Zhang", title = "Infer Metagenomic Abundance and Reveal Homologous Genomes Based on the Structure of Taxonomy Tree", journal = j-TCBB, volume = "12", number = "5", pages = "1112--1122", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Leoncini:2015:CCT, author = "Mauro Leoncini and Manuela Montangero and Marco Pellegrini and Karina Panucia Tillan", title = "{CMStalker}: a Combinatorial Tool for Composite Motif Discovery", journal = j-TCBB, volume = "12", number = "5", pages = "1123--1136", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2359444", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zrimec:2015:FPD, author = "Jan Zrimec and Ales Lapanje", title = "Fast Prediction of {DNA} Melting Bubbles Using {DNA} Thermodynamic Stability", journal = j-TCBB, volume = "12", number = "5", pages = "1137--1145", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2396057", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2015:FMA, author = "Shuqin Zhang and Hongyu Zhao and Michael K. Ng", title = "Functional Module Analysis for Gene Coexpression Networks with Network Integration", journal = j-TCBB, volume = "12", number = "5", pages = "1146--1160", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2396073", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2015:IGN, author = "Jeong-Rae Kim and Sang-Mok Choo and Hyung-Seok Choi and Kwang-Hyun Cho", title = "Identification of Gene Networks with Time Delayed Regulation Based on Temporal Expression Profiles", journal = j-TCBB, volume = "12", number = "5", pages = "1161--1168", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394312", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lo:2015:ITD, author = "Leung-Yau Lo and Kwong-Sak Leung and Kin-Hong Lee", title = "Inferring Time-Delayed Causal Gene Network Using Time-Series Expression Data", journal = j-TCBB, volume = "12", number = "5", pages = "1169--1182", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kleftogiannis:2015:YNE, author = "Dimitrios Kleftogiannis and Konstantinos Theofilatos and Spiros Likothanassis and Seferina Mavroudi", title = "{YamiPred}: a Novel Evolutionary Method for Predicting {Pre-miRNAs} and Selecting Relevant Features", journal = j-TCBB, volume = "12", number = "5", pages = "1183--1192", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2388227", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lovato:2015:MAP, author = "Pietro Lovato and Alejandro Giorgetti and Manuele Bicego", title = "A Multimodal Approach for Protein Remote Homology Detection", journal = j-TCBB, volume = "12", number = "5", pages = "1193--1198", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424417", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bordon:2015:FLC, author = "Jure Bordon and Miha Moskon and Nikolaj Zimic and Miha Mraz", title = "Fuzzy Logic as a Computational Tool for Quantitative Modelling of Biological Systems with Uncertain Kinetic Data", journal = j-TCBB, volume = "12", number = "5", pages = "1199--1205", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424424", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2015:SGC, author = "Po-Kuei Chen and Chun-Liang Lin", title = "Synthesis of Genetic Clock with Combinational Biologic Circuits", journal = j-TCBB, volume = "12", number = "5", pages = "1206--1212", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2396060", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Athanasiadis:2015:ZAM, author = "Emmanouil I. Athanasiadis and Marilena M. Bourdakou and George M. Spyrou", title = "{ZoomOut}: Analyzing Multiple Networks as Single Nodes", journal = j-TCBB, volume = "12", number = "5", pages = "1213--1216", month = sep, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424411", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Dec 8 06:52:41 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2015:GEA, author = "Hsien-Da Huang and Yi-Ping Phoebe Chen", title = "Guest Editorial for the {13th Asia Pacific Bioinformatics Conference}", journal = j-TCBB, volume = "12", number = "6", pages = "1217--1218", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2451231", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2015:USA, author = "Liang Cheng and Jie Li and Yang Hu and Yue Jiang and Yongzhuang Liu and Yanshuo Chu and Zhenxing Wang and Yadong Wang", title = "Using Semantic Association to Extend and Infer Literature-Oriented Relativity Between Terms", journal = j-TCBB, volume = "12", number = "6", pages = "1219--1226", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430289", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Relative terms often appear together in the literature. Methods have been presented for weighting relativity of pairwise terms by their co-occurring literature and inferring new relationship. Terms in the literature are also in the directed acyclic graph of ontologies, such as Gene Ontology and Disease Ontology. Therefore, semantic association between terms may help for establishing relativities between terms in literature. However, current methods do not use these associations. In this paper, an adjusted R-scaled score (ARSS) based on information content (ARSSIC) method is introduced to infer new relationship between terms. First, set inclusion relationship between terms of ontology was exploited to extend relationships between these terms and literature. Next, the ARSS method was presented to measure relativity between terms across ontologies according to these extensional relationships. Then, the ARSSIC method using ratios of information shared of term's ancestors was designed to infer new relationship between terms across ontologies. The result of the experiment shows that ARSS identified more pairs of statistically significant terms based on corresponding gene sets than other methods. And the high average area under the receiver operating characteristic curve (0.9293) shows that ARSSIC achieved a high true positive rate and a low false positive rate. Data is available at http://mlg.hit.edu.cn/ARSSIC/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wieseke:2015:CRI, author = "Nicolas Wieseke and Tom Hartmann and Matthias Bernt and Martin Middendorf", title = "Cophylogenetic Reconciliation with {ILP}", journal = j-TCBB, volume = "12", number = "6", pages = "1227--1235", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430336", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we present an integer linear programming (ILP) approach, called CoRe-ILP, for finding an optimal time consistent cophylogenetic host-parasite reconciliation under the cophylogenetic event model with the events cospeciation, duplication, sorting, host switch, and failure to diverge. Instead of assuming event costs, a simplified model is used, maximizing primarily for cospeciations and secondarily minimizing host switching events. Duplications, sortings, and failure to diverge events are not explicitly scored. Different from existing event based reconciliation methods, CoRe-ILP can use (approximate) phylogenetic branch lengths for filtering possible ancestral host-parasite interactions. Experimentally, it is shown that CoRe-ILP can successfully use branch length information and performs well for biological and simulated data sets. The results of CoRe-ILP are compared with the results of the reconciliation tools Jane 4, Treemap 3b, NOTUNG 2.8 Beta, and Ranger-DTL. Algorithm CoRe-ILP is implemented using IBM ILOG CPLEX Optimizer 12.6 and is freely available from http://pacosy.informatik.uni-leipzig.de/core-ilp.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2015:DIN, author = "Huidong Chen and Jihong Guan and Shuigeng Zhou", title = "{DPNuc}: Identifying Nucleosome Positions Based on the {Dirichlet} Process Mixture Model", journal = j-TCBB, volume = "12", number = "6", pages = "1236--1247", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430350", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nucleosomes and the free linker DNA between them assemble the chromatin. Nucleosome positioning plays an important role in gene transcription regulation, DNA replication and repair, alternative splicing, and so on. With the rapid development of ChIP-seq, it is possible to computationally detect the positions of nucleosomes on chromosomes. However, existing methods cannot provide accurate and detailed information about the detected nucleosomes, especially for the nucleosomes with complex configurations where overlaps and noise exist. Meanwhile, they usually require some prior knowledge of nucleosomes as input, such as the size or the number of the unknown nucleosomes, which may significantly influence the detection results. In this paper, we propose a novel approach DPNuc for identifying nucleosome positions based on the Dirichlet process mixture model. In our method, Markov chain Monte Carlo (MCMC) simulations are employed to determine the mixture model with no need of prior knowledge about nucleosomes. Compared with three existing methods, our approach can provide more detailed information of the detected nucleosomes and can more reasonably reveal the real configurations of the chromosomes; especially, our approach performs better in the complex overlapping situations. By mapping the detected nucleosomes to a synthetic benchmark nucleosome map and two existing benchmark nucleosome maps, it is shown that our approach achieves a better performance in identifying nucleosome positions and gets a higher $F$-score. Finally, we show that our approach can more reliably detect the size distribution of nucleosomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gao:2015:CCE, author = "Nan Gao and Yan Zhang and Bing Feng and Jijun Tang", title = "A Cooperative Co-Evolutionary Genetic Algorithm for Tree Scoring and Ancestral Genome Inference", journal = j-TCBB, volume = "12", number = "6", pages = "1248--1254", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430860", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent advances of technology have made it easy to obtain and compare whole genomes. Rearrangements of genomes through operations such as reversals and transpositions are rare events that enable researchers to reconstruct deep evolutionary history among species. Some of the popular methods need to search a large tree space for the best scored tree, thus it is desirable to have a fast and accurate method that can score a given tree efficiently. During the tree scoring procedure, the genomic structures of internal tree nodes are also provided, which provide important information for inferring ancestral genomes and for modeling the evolutionary processes. However, computing tree scores and ancestral genomes are very difficult and a lot of researchers have to rely on heuristic methods which have various disadvantages. In this paper, we describe the first genetic algorithm for tree scoring and ancestor inference, which uses a fitness function considering co-evolution, adopts different initial seeding methods to initialize the first population pool, and utilizes a sorting-based approach to realize evolution. Our extensive experiments show that compared with other existing algorithms, this new method is more accurate and can infer ancestral genomes that are much closer to the true ancestors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xie:2015:LFA, author = "Minzhu Xie and Jianxin Wang and Xin Chen", title = "{LGH}: a Fast and Accurate Algorithm for Single Individual Haplotyping Based on a Two-Locus Linkage Graph", journal = j-TCBB, volume = "12", number = "6", pages = "1255--1266", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430352", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phased haplotype information is crucial in our complete understanding of differences between individuals at the genetic level. Given a collection of DNA fragments sequenced from a homologous pair of chromosomes, the problem of single individual haplotyping (SIH) aims to reconstruct a pair of haplotypes using a computer algorithm. In this paper, we encode the information of aligned DNA fragments into a two-locus linkage graph and approach the SIH problem by vertex labeling of the graph. In order to find a vertex labeling with the minimum sum of weights of incompatible edges, we develop a fast and accurate heuristic algorithm. It starts with detecting error-tolerant components by an adapted breadth-first search. A proper labeling of vertices is then identified for each component, with which sequencing errors are further corrected and edge weights are adjusted accordingly. After contracting each error-tolerant component into a single vertex, the above procedure is iterated on the resulting condensed linkage graph until error-tolerant components are no longer detected. The algorithm finally outputs a haplotype pair based on the vertex labeling. Extensive experiments on simulated and real data show that our algorithm is more accurate and faster than five existing algorithms for single individual haplotyping.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kumozaki:2015:MLB, author = "Shotaro Kumozaki and Kengo Sato and Yasubumi Sakakibara", title = "A Machine Learning Based Approach to de novo Sequencing of Glycans from Tandem Mass Spectrometry Spectrum", journal = j-TCBB, volume = "12", number = "6", pages = "1267--1274", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430317", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, glycomics has been actively studied and various technologies for glycomics have been rapidly developed. Currently, tandem mass spectrometry (MS/MS) is one of the key experimental tools for identification of structures of oligosaccharides. MS/MS can observe MS/MS peaks of fragmented glycan ions including cross-ring ions resulting from internal cleavages, which provide valuable information to infer glycan structures. Thus, the aim of de novo sequencing of glycans is to find the most probable assignments of observed MS/MS peaks to glycan substructures without databases. However, there are few satisfiable algorithms for glycan de novo sequencing from MS/MS spectra. We present a machine learning based approach to de novo sequencing of glycans from MS/MS spectrum. First, we build a suitable model for the fragmentation of glycans including cross-ring ions, and implement a solver that employs Lagrangian relaxation with a dynamic programming technique. Then, to optimize scores for the algorithm, we introduce a machine learning technique called structured support vector machines that enable us to learn parameters including scores for cross-ring ions from training data, i.e., known glycan mass spectra. Furthermore, we implement additional constraints for core structures of well-known glycan types including N-linked glycans and O-linked glycans. This enables us to predict more accurate glycan structures if the glycan type of given spectra is known. Computational experiments show that our algorithm performs accurate de novo sequencing of glycans. The implementation of our algorithm and the datasets are available at http://glyfon.dna.bio.keio.ac.jp/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xie:2015:CTC, author = "Xiaojing Xie and Shuigeng Zhou and Jihong Guan", title = "{CoGI}: Towards Compressing Genomes as an Image", journal = j-TCBB, volume = "12", number = "6", pages = "1275--1285", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430331", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genomic science is now facing an explosive increase of data thanks to the fast development of sequencing technology. This situation poses serious challenges to genomic data storage and transferring. It is desirable to compress data to reduce storage and transferring cost, and thus to boost data distribution and utilization efficiency. Up to now, a number of algorithms / tools have been developed for compressing genomic sequences. Unlike the existing algorithms, most of which treat genomes as one-dimensional text strings and compress them based on dictionaries or probability models, this paper proposes a novel approach called CoGI (the abbreviation of Compressing Genomes as an Image) for genome compression, which transforms the genomic sequences to a two-dimensional binary image (or bitmap), then applies a rectangular partition coding algorithm to compress the binary image. CoGI can be used as either a reference-based compressor or a reference-free compressor. For the former, we develop two entropy-based algorithms to select a proper reference genome. Performance evaluation is conducted on various genomes. Experimental results show that the reference-based CoGI significantly outperforms two state-of-the-art reference-based genome compressors GReEn and RLZ-opt in both compression ratio and compression efficiency. It also achieves comparable compression ratio but two orders of magnitude higher compression efficiency in comparison with XM-one state-of-the-art reference-free genome compressor. Furthermore, our approach performs much better than Gzip-a general-purpose and widely-used compressor, in both compression speed and compression ratio. So, CoGI can serve as an effective and practical genome compressor. The source code and other related documents of CoGI are available at: http://admis.fudan.edu.cn/projects/cogi.htm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2015:BMM, author = "Beichen Wang and Xiaodong Chen and Hiroshi Mamitsuka and Shanfeng Zhu", title = "{BMExpert}: Mining {MEDLINE} for Finding Experts in Biomedical Domains Based on Language Model", journal = j-TCBB, volume = "12", number = "6", pages = "1286--1294", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430338", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the rapid development of biomedical sciences, a great number of documents have been published to report new scientific findings and advance the process of knowledge discovery. By the end of 2013, the largest biomedical literature database, MEDLINE, has indexed over 23 million abstracts. It is thus not easy for scientific professionals to find experts on a certain topic in the biomedical domain. In contrast to the existing services that use some ad hoc approaches, we developed a novel solution to biomedical expert finding, BMExpert, based on the language model. For finding biomedical experts, who are the most relevant to a specific topic query, BMExpert mines MEDLINE documents by considering three important factors: relevance of documents to the query topic, importance of documents, and associations between documents and experts. The performance of BMExpert was evaluated on a benchmark dataset, which was built by collecting the program committee members of ISMB in the past three years (2012-2014) on 14 different topics. Experimental results show that BMExpert outperformed three existing biomedical expert finding services: JANE, GoPubMed, and eTBLAST, with respect to both MAP (mean average precision) and P@50 (Precision). BMExpert is freely accessed at http://datamining-iip.fudan.edu.cn/service/BMExpert/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2015:RBC, author = "Han Li and Chun Li and Jie Hu and Xiaodan Fan", title = "A Resampling Based Clustering Algorithm for Replicated Gene Expression Data", journal = j-TCBB, volume = "12", number = "6", pages = "1295--1303", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2403320", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In gene expression data analysis, clustering is a fruitful exploratory technique to reveal the underlying molecular mechanism by identifying groups of co-expressed genes. To reduce the noise, usually multiple experimental replicates are performed. An integrative analysis of the full replicate data, instead of reducing the data to the mean profile, carries the promise of yielding more precise and robust clusters. In this paper, we propose a novel resampling based clustering algorithm for genes with replicated expression measurements. Assuming those replicates are exchangeable, we formulate the problem in the bootstrap framework, and aim to infer the consensus clustering based on the bootstrap samples of replicates. In our approach, we adopt the mixed effect model to accommodate the heterogeneous variances and implement a quasi-MCMC algorithm to conduct statistical inference. Experiments demonstrate that by taking advantage of the full replicate data, our algorithm produces more reliable clusters and has robust performance in diverse scenarios, especially when the data is subject to multiple sources of variance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Esfahani:2015:OBF, author = "Mohammad Shahrokh Esfahani and Edward R. Dougherty", title = "An Optimization-Based Framework for the Transformation of Incomplete Biological Knowledge into a Probabilistic Structure and Its Application to the Utilization of Gene\slash Protein Signaling Pathways in Discrete Phenotype Classification", journal = j-TCBB, volume = "12", number = "6", pages = "1304--1321", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424407", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phenotype classification via genomic data is hampered by small sample sizes that negatively impact classifier design. Utilization of prior biological knowledge in conjunction with training data can improve both classifier design and error estimation via the construction of the optimal Bayesian classifier. In the genomic setting, gene/protein signaling pathways provide a key source of biological knowledge. Although these pathways are neither complete, nor regulatory, with no timing associated with them, they are capable of constraining the set of possible models representing the underlying interaction between molecules. The aim of this paper is to provide a framework and the mathematical tools to transform signaling pathways to prior probabilities governing uncertainty classes of feature-label distributions used in classifier design. Structural motifs extracted from the signaling pathways are mapped to a set of constraints on a prior probability on a Multinomial distribution. Being the conjugate prior for the Multinomial distribution, we propose optimization paradigms to estimate the parameters of a Dirichlet distribution in the Bayesian setting. The performance of the proposed methods is tested on two widely studied pathways: mammalian cell cycle and a p53 pathway model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2015:CMD, author = "Kin-On Cheng and Paula Wu and Ngai-Fong Law and Wan-Chi Siu", title = "Compression of Multiple {DNA} Sequences Using Intra-Sequence and Inter-Sequence Similarities", journal = j-TCBB, volume = "12", number = "6", pages = "1322--1332", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2403370", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Traditionally, intra-sequence similarity is exploited for compressing a single DNA sequence. Recently, remarkable compression performance of individual DNA sequence from the same population is achieved by encoding its difference with a nearly identical reference sequence. Nevertheless, there is lack of general algorithms that also allow less similar reference sequences. In this work, we extend the intra-sequence to the inter-sequence similarity in that approximate matches of subsequences are found between the DNA sequence and a set of reference sequences. Hence, a set of nearly identical DNA sequences from the same population or a set of partially similar DNA sequences like chromosome sequences and DNA sequences of related species can be compressed together. For practical compressors, the compressed size is usually influenced by the compression order of sequences. Fast search algorithms for the optimal compression order are thus developed for multiple sequences compression. Experimental results on artificial and real datasets demonstrate that our proposed multiple sequences compression methods with fast compression order search are able to achieve good compression performance under different levels of similarity in the multiple DNA sequences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ou-Yang:2015:DPC, author = "Le Ou-Yang and Dao-Qing Dai and Xiao-Fei Zhang", title = "Detecting Protein Complexes from Signed Protein-Protein Interaction Networks", journal = j-TCBB, volume = "12", number = "6", pages = "1333--1344", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2401014", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of protein complexes is fundamental for understanding the cellular functional organization. With the accumulation of physical protein-protein interaction (PPI) data, computational detection of protein complexes from available PPI networks has drawn a lot of attentions. While most of the existing protein complex detection algorithms focus on analyzing the physical protein-protein interaction network, none of them take into account the ``signs'' (i.e., activation-inhibition relationships) of physical interactions. As the ``signs'' of interactions reflect the way proteins communicate, considering the ``signs'' of interactions can not only increase the accuracy of protein complex identification, but also deepen our understanding of the mechanisms of cell functions. In this study, we proposed a novel Signed Graph regularized Nonnegative Matrix Factorization (SGNMF) model to identify protein complexes from signed PPI networks. In our experiments, we compared the results collected by our model on signed PPI networks with those predicted by the state-of-the-art complex detection techniques on the original unsigned PPI networks. We observed that considering the ``signs'' of interactions significantly benefits the detection of protein complexes. Furthermore, based on the predicted complexes, we predicted a set of signed complex-complex interactions for each dataset, which provides a novel insight of the higher level organization of the cell. All the experimental results and codes can be downloaded from http://mail.sysu.edu.cn/home/stsddq@mail.sysu.edu.cn/dai/others/SGNMF.zip.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tomescu:2015:EWD, author = "Alexandru I. Tomescu and Travis Gagie and Alexandru Popa and Romeo Rizzi and Anna Kuosmanen and Veli Makinen", title = "Explaining a Weighted {DAG} with Few Paths for Solving Genome-Guided Multi-Assembly", journal = j-TCBB, volume = "12", number = "6", pages = "1345--1354", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2418753", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA-Seq technology offers new high-throughput ways for transcript identification and quantification based on short reads, and has recently attracted great interest. This is achieved by constructing a weighted DAG whose vertices stand for exons, and whose arcs stand for split alignments of the RNA-Seq reads to the exons. The task consists of finding a number of paths, together with their expression levels, which optimally explain the weights of the graph under various fitting functions, such as least sum of squared residuals. In (Tomescu et al. BMC Bioinformatics, 2013) we studied this genome-guided multi-assembly problem when the number of allowed solution paths was linear in the number of arcs. In this paper, we further refine this problem by asking for a bounded number $k$ of solution paths, which is the setting of most practical interest. We formulate this problem in very broad terms, and show that for many choices of the fitting function it becomes NP-hard. Nevertheless, we identify a natural graph parameter of a DAG $G$ , which we call arc-width and denote $ \langle G \rangle $ , and give a dynamic programming algorithm running in time $ O(W^k \langle G \rangle^k(\langle G \rangle + k)n)$ , where $n$ is the number of vertices and $W$ is the maximum weight of $G$. This implies that the problem is fixed-parameter tractable (FPT) in the parameters $W$ , $ \langle G \rangle $ $k$ . We also show that the arc-width of DAGs constructed from simulated and real RNA-Seq reads is small in practice. Finally, we study the approximability of this problem, and, in particular, give a fully polynomial-time approximation scheme (FPTAS) for the case when the fitting function penalizes the maximum ratio between the weights of the arcs and their predicted coverage.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Seniya:2015:SSS, author = "Chandrabhan Seniya and Ajay Yadav and G. J. Khan and Nand K. Sah", title = "In-silico Studies Show Potent Inhibition of {HIV-1} Reverse Transcriptase Activity by a Herbal Drug", journal = j-TCBB, volume = "12", number = "6", pages = "1355--1364", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415771", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Acquired immunodeficiency syndrome (AIDS) is a life threatening disease of the human immune system caused by human immunodeficiency virus (HIV). Effective inhibition of reverse transcriptase activity is a prominent, clinically viable approach for the treatment of AIDS. Few non-nucleoside reverse transcriptase inhibitors (NNRTIs) have been approved by the United States Food and Drug Administration (US FDA) as drugs for AIDS. In order to enhance therapeutic options against AIDS we examined novel herbal compounds of 4-thiazolidinone and its derivatives that are known to have remarkable antiviral potency. Our molecular docking and simulation experiments have identified one such herbal molecule known as (5E)-3-(2-aminoethyl)-5-benzylidene-1, 3-thiazolidine-2,4-dione that may bind HIV-1RT with high affinity to cause noncompetitive inhibition. Results are also compared with other US FDA approved drugs. Long de novo simulations and docking study suggest that the ligand (5E)-3-(2-aminoethyl)-5-benzylidene-1, 3-thiazolidine-2,4-dione (CID: 1656714) has strong binding interactions with Asp113, Asp110, Asp185 and Asp186 amino acids, all of which belong to one or the other catalytic pockets of HIV-1RT. It is expected that these interactions could be critical in the inhibitory activity of the HIV-1RT. Therefore, this study provides an evidence for consideration of (5E)-3-(2-aminoethyl)-5-benzylidene-1, 3-thiazolidine-2,4-dione as a valuable natural molecule in the treatment and prevention of HIV- associated disorders.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rubiolo:2015:MGR, author = "Mariano Rubiolo and Diego H. Milone and Georgina Stegmayer", title = "Mining Gene Regulatory Networks by Neural Modeling of Expression Time-Series", journal = j-TCBB, volume = "12", number = "6", pages = "1365--1373", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2420551", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Discovering gene regulatory networks from data is one of the most studied topics in recent years. Neural networks can be successfully used to infer an underlying gene network by modeling expression profiles as times series. This work proposes a novel method based on a pool of neural networks for obtaining a gene regulatory network from a gene expression dataset. They are used for modeling each possible interaction between pairs of genes in the dataset, and a set of mining rules is applied to accurately detect the subjacent relations among genes. The results obtained on artificial and real datasets confirm the method effectiveness for discovering regulatory networks from a proper modeling of the temporal dynamics of gene expression profiles.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liao:2015:EFR, author = "Bo Liao and Yan Jiang and Wei Liang and Lihong Peng and Li Peng and Damien Hanyurwimfura and Zejun Li and Min Chen", title = "On Efficient Feature Ranking Methods for High-Throughput Data Analysis", journal = j-TCBB, volume = "12", number = "6", pages = "1374--1384", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415790", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Efficient mining of high-throughput data has become one of the popular themes in the big data era. Existing biology-related feature ranking methods mainly focus on statistical and annotation information. In this study, two efficient feature ranking methods are presented. Multi-target regression and graph embedding are incorporated in an optimization framework, and feature ranking is achieved by introducing structured sparsity norm. Unlike existing methods, the presented methods have two advantages: (1) the feature subset simultaneously account for global margin information as well as locality manifold information. Consequently, both global and locality information are considered. (2) Features are selected by batch rather than individually in the algorithm framework. Thus, the interactions between features are considered and the optimal feature subset can be guaranteed. In addition, this study presents a theoretical justification. Empirical experiments demonstrate the effectiveness and efficiency of the two algorithms in comparison with some state-of-the-art feature ranking methods through a set of real-world gene expression data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2015:PPR, author = "Xin Ma and Jing Guo and Ke Xiao and Xiao Sun", title = "{PRBP}: Prediction of {RNA}-Binding Proteins Using a Random Forest Algorithm Combined with an {RNA}-Binding Residue Predictor", journal = j-TCBB, volume = "12", number = "6", pages = "1385--1393", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2418773", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The prediction of RNA-binding proteins is an incredibly challenging problem in computational biology. Although great progress has been made using various machine learning approaches with numerous features, the problem is still far from being solved. In this study, we attempt to predict RNA-binding proteins directly from amino acid sequences. A novel approach, PRBP predicts RNA-binding proteins using the information of predicted RNA-binding residues in conjunction with a random forest based method. For a given protein, we first predict its RNA-binding residues and then judge whether the protein binds RNA or not based on information from that prediction. If the protein cannot be identified by the information associated with its predicted RNA-binding residues, then a novel random forest predictor is used to determine if the query protein is a RNA-binding protein. We incorporated features of evolutionary information combined with physicochemical features (EIPP) and amino acid composition feature to establish the random forest predictor. Feature analysis showed that EIPP contributed the most to the prediction of RNA-binding proteins. The results also showed that the information from the RNA-binding residue prediction improved the overall performance of our RNA-binding protein prediction. It is anticipated that the PRBP method will become a useful tool for identifying RNA-binding proteins. A PRBP Web server implementation is freely available at http://www.cbi.seu.edu.cn/PRBP/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sriwastava:2015:PPP, author = "Brijesh K. Sriwastava and Subhadip Basu and Ujjwal Maulik", title = "Predicting Protein-Protein Interaction Sites with a Novel Membership Based Fuzzy {SVM} Classifier", journal = j-TCBB, volume = "12", number = "6", pages = "1394--1404", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2401018", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Predicting residues that participate in protein-protein interactions (PPI) helps to identify, which amino acids are located at the interface. In this paper, we show that the performance of the classical support vector machine (SVM) algorithm can further be improved with the use of a custom-designed fuzzy membership function, for the partner-specific PPI interface prediction problem. We evaluated the performances of both classical SVM and fuzzy SVM (F-SVM) on the PPI databases of three different model proteomes of Homo sapiens, Escherichia coli and Saccharomyces Cerevisiae and calculated the statistical significance of the developed F-SVM over classical SVM algorithm. We also compared our performance with the available state-of-the-art fuzzy methods in this domain and observed significant performance improvements. To predict interaction sites in protein complexes, local composition of amino acids together with their physico-chemical characteristics are used, where the F-SVM based prediction method exploits the membership function for each pair of sequence fragments. The average F-SVM performance (area under ROC curve) on the test samples in 10-fold cross validation experiment are measured as 77.07, 78.39, and 74.91 percent for the aforementioned organisms respectively. Performances on independent test sets are obtained as 72.09, 73.24 and 82.74 percent respectively. The software is available for free download from http://code.google.com/p/cmater-bioinfo.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ullah:2015:PUA, author = "Ehsan Ullah and Mark Walker and Kyongbum Lee and Soha Hassoun", title = "{PreProPath}: an Uncertainty-Aware Algorithm for Identifying Predictable Profitable Pathways in Biochemical Networks", journal = j-TCBB, volume = "12", number = "6", pages = "1405--1415", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394470", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pathway analysis is a powerful approach to enable rational design or redesign of biochemical networks for optimizing metabolic engineering and synthetic biology objectives such as production of desired chemicals or biomolecules from specific nutrients. While experimental methods can be quite successful, computational approaches can enhance discovery and guide experimentation by efficiently exploring very large design spaces. We present a computational algorithm, Predictably Profitable Path (PreProPath), to identify target pathways best suited for engineering modifications. The algorithm utilizes uncertainties about the metabolic networks operating state inherent in the underdetermined linear equations representing the stoichiometric model. Flux Variability Analysis is used to determine the operational flux range. PreProPath identifies a path that is predictable in behavior, exhibiting small flux ranges, and profitable, containing the least restrictive flux-limiting reaction in the network. The algorithm is computationally efficient because it does not require enumeration of pathways. The results of case studies show that PreProPath can efficiently analyze variances in metabolic states and model uncertainties to suggest pathway engineering strategies that have been previously supported by experimental data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2015:PIM, author = "Ka-Chun Wong and Chengbin Peng and Yue Li", title = "Probabilistic Inference on Multiple Normalized Signal Profiles from Next Generation Sequencing: Transcription Factor Binding Sites", journal = j-TCBB, volume = "12", number = "6", pages = "1416--1428", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424421", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the prevalence of chromatin immunoprecipitation (ChIP) with sequencing (ChIP-Seq) technology, massive ChIP-Seq data has been accumulated. The ChIP-Seq technology measures the genome-wide occupancy of DNA-binding proteins in vivo. It is well-known that different DNA-binding protein occupancies may result in a gene being regulated in different conditions (e.g. different cell types). To fully understand a gene's function, it is essential to develop probabilistic models on multiple ChIP-Seq profiles for deciphering the gene transcription causalities. In this work, we propose and describe two probabilistic models. Assuming the conditional independence of different DNA-binding proteins' occupancies, the first method (SignalRanker) is developed as an intuitive method for ChIP-Seq genome-wide signal profile inference. Unfortunately, such an assumption may not always hold in some gene regulation cases. Thus, we propose and describe another method (FullSignalRanker) which does not make the conditional independence assumption. The proposed methods are compared with other existing methods on ENCODE ChIP-Seq datasets, demonstrating its regression and classification ability. The results suggest that FullSignalRanker is the best-performing method for recovering the signal ranks on the promoter and enhancer regions. In addition, FullSignalRanker is also the best-performing method for peak sequence classification. We envision that SignalRanker and FullSignalRanker will become important in the era of next generation sequencing. FullSignalRanker program is available on the following website: \url{http://www.cs.toronto.edu/~wkc/FullSignalRanker/}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Raveh:2015:RFM, author = "Alon Raveh and Yoram Zarai and Michael Margaliot and Tamir Tuller", title = "Ribosome Flow Model on a Ring", journal = j-TCBB, volume = "12", number = "6", pages = "1429--1439", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2418782", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The asymmetric simple exclusion process (ASEP) is an important model from statistical physics describing particles that hop randomly from one site to the next along an ordered lattice of sites, but only if the next site is empty. ASEP has been used to model and analyze numerous multiagent systems with local interactions including the flow of ribosomes along the mRNA strand. In ASEP with periodic boundary conditions a particle that hops from the last site returns to the first one. The mean field approximation of this model is referred to as the ribosome flow model on a ring (RFMR). The RFMR may be used to model both synthetic and endogenous gene expression regimes. We analyze the RFMR using the theory of monotone dynamical systems. We show that it admits a continuum of equilibrium points and that every trajectory converges to an equilibrium point. Furthermore, we show that it entrains to periodic transition rates between the sites. We describe the implications of the analysis results to understanding and engineering cyclic mRNA translation in-vitro and in-vivo.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sehhati:2015:SGS, author = "Mohammadreza Sehhati and Alireza Mehridehnavi and Hossein Rabbani and Meraj Pourhossein", title = "Stable Gene Signature Selection for Prediction of Breast Cancer Recurrence Using Joint Mutual Information", journal = j-TCBB, volume = "12", number = "6", pages = "1440--1448", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2407407", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this experiment, a gene selection technique was proposed to select a robust gene signature from microarray data for prediction of breast cancer recurrence. In this regard, a hybrid scoring criterion was designed as linear combinations of the scores that were determined in the mutual information (MI) domain and protein-protein interactions network. Whereas, the MI-based score represents the complementary information between the selected genes for outcome prediction; and the number of connections in the PPI network between the selected genes builds the PPI-based score. All genes were scored by using the proposed function in a hybrid forward-backward gene-set selection process to select the optimum biomarker-set from the gene expression microarray data. The accuracy and stability of the finally selected biomarkers were evaluated by using five-fold cross-validation (CV) to classify available data on breast cancer patients into two cohorts of poor and good prognosis. The results showed an appealing improvement in the cross-dataset accuracy in comparison with similar studies whenever we applied a primary signature, which was selected from one dataset, to predict survival in other independent datasets. Moreover, the proposed method demonstrated 58-92 percent overlap between 50-genes signatures, which were selected from seven independent datasets individually.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2015:SAS, author = "Hao Zhang and Xingyuan Wang and Xiaohui Lin", title = "Synchronization of Asynchronous Switched {Boolean} Network", journal = j-TCBB, volume = "12", number = "6", pages = "1449--1456", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2404802", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, the complete synchronizations for asynchronous switched Boolean network with free Boolean sequence controllers and close-loop controllers are studied. First, the basic asynchronous switched Boolean network model is provided. With the method of semi-tensor product, the Boolean dynamics is translated into linear representation. Second, necessary and sufficient conditions for ASBN synchronization with free Boolean sequence control and close-loop control are derived, respectively. Third, some illustrative examples are provided to show the efficiency of the proposed methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sengupta:2015:CAU, author = "T. Sengupta and M. Bhushan and P. P. Wangikar", title = "A Computational Approach Using Ratio Statistics for Identifying Housekeeping Genes from {cDNA} Microarray Data", journal = j-TCBB, volume = "12", number = "6", pages = "1457--1463", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2407399", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We predict housekeeping genes from replicate microarray gene expression data of human lymphoblastoid cells and liver tissue with outliers removed using a scoring scheme, by an algorithm based on statistical hypothesis testing, assuming that such genes are constitutively expressed. A few predicted genes were examined and found to be housekeeping.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pradeep:2015:NSB, author = "Prachi Pradeep and Craig Struble and Terrence Neumann and Daniel S. Sem and Stephen J. Merrill", title = "A Novel Scoring Based Distributed Protein Docking Application to Improve Enrichment", journal = j-TCBB, volume = "12", number = "6", pages = "1464--1469", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2401020", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Molecular docking is a computational technique which predicts the binding energy and the preferred binding mode of a ligand to a protein target. Virtual screening is a tool which uses docking to investigate large chemical libraries to identify ligands that bind favorably to a protein target. We have developed a novel scoring based distributed protein docking application to improve enrichment in virtual screening. The application addresses the issue of time and cost of screening in contrast to conventional systematic parallel virtual screening methods in two ways. Firstly, it automates the process of creating and launching multiple independent dockings on a high performance computing cluster. Secondly, it uses a Na{\"\i}ve Bayes scoring function to calculate binding energy of un-docked ligands to identify and preferentially dock (Autodock predicted) better binders. The application was tested on four proteins using a library of 10,573 ligands. In all the experiments, (i). 200 of the 1,000 best binders are identified after docking only $ \sim 14 $ percent of the chemical library, (ii). 9 or 10 best-binders are identified after docking only $ \sim 19 $ percent of the chemical library, and (iii). no significant enrichment is observed after docking $ \sim 70 $ percent of the chemical library. The results show significant increase in enrichment of potential drug leads in early rounds of virtual screening.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dellen:2015:GSR, author = "Babette Dellen and Hanno Scharr and Carme Torras", title = "Growth Signatures of Rosette Plants from Time-Lapse Video", journal = j-TCBB, volume = "12", number = "6", pages = "1470--1478", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2404810", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Plant growth is a dynamic process, and the precise course of events during early plant development is of major interest for plant research. In this work, we investigate the growth of rosette plants by processing time-lapse videos of growing plants, where we use Nicotiana tabacum (tobacco) as a model plant. In each frame of the video sequences, potential leaves are detected using a leaf-shape model. These detections are prone to errors due to the complex shape of plants and their changing appearance in the image, depending on leaf movement, leaf growth, and illumination conditions. To cope with this problem, we employ a novel graph-based tracking algorithm which can bridge gaps in the sequence by linking leaf detections across a range of neighboring frames. We use the overlap of fitted leaf models as a pairwise similarity measure, and forbid graph edges that would link leaf detections within a single frame. We tested the method on a set of tobacco-plant growth sequences, and could track the first leaves of the plant, including partially or temporarily occluded ones, along complete sequences, demonstrating the applicability of the method to automatic plant growth analysis. All seedlings displayed approximately the same growth behavior, and a characteristic growth signature was found.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wigren:2015:MOI, author = "Torbjorn Wigren", title = "Model Order and Identifiability of Non-Linear Biological Systems in Stable Oscillation", journal = j-TCBB, volume = "12", number = "6", pages = "1479--1484", month = nov, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2404799", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 15 05:57:23 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The paper presents a theoretical result that clarifies when it is at all possible to determine the nonlinear dynamic equations of a biological system in stable oscillation, from measured data. As it turns out the minimal order needed for this is dependent on the minimal dimension in which the stable orbit of the system does not intersect itself. This is illustrated with a simulated fourth order Hodgkin--Huxley spiking neuron model, which is identified using a non-linear second order differential equation model. The simulated result illustrates that the underlying higher order model of the spiking neuron cannot be uniquely determined given only the periodic measured data. The result of the paper is of general validity when the dynamics of biological systems in stable oscillation is identified, and illustrates the need to carefully address non-linear identifiability aspects when validating models based on periodic data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2016:GES, author = "De-Shuang Huang and Vitoantonio Bevilacqua and M. Michael Gromiha", title = "Guest Editorial for Special Section on the {10th International Conference on Intelligent Computing (ICIC)}", journal = j-TCBB, volume = "13", number = "1", pages = "1--3", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2491058", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Han:2016:GII, author = "Kyungsook Han and Jeonghoon Lee", title = "{GeneNetFinder2}: Improved Inference of Dynamic Gene Regulatory Relations with Multiple Regulators", journal = j-TCBB, volume = "13", number = "1", pages = "4--11", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2450728", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A gene involved in complex regulatory interactions may have multiple regulators since gene expression in such interactions is often controlled by more than one gene. Another thing that makes gene regulatory interactions complicated is that regulatory interactions are not static, but change over time during the cell cycle. Most research so far has focused on identifying gene regulatory relations between individual genes in a particular stage of the cell cycle. In this study we developed a method for identifying dynamic gene regulations of several types from the time-series gene expression data. The method can find gene regulations with multiple regulators that work in combination or individually as well as those with single regulators. The method has been implemented as the second version of GeneNetFinder (hereafter called GeneNetFinder2) and tested on several gene expression datasets. Experimental results with gene expression data revealed the existence of genes that are not regulated by individual genes but rather by a combination of several genes. Such gene regulatory relations cannot be found by conventional methods. Our method finds such regulatory relations as well as those with multiple, independent regulators or single regulators, and represents gene regulatory relations as a dynamic network in which different gene regulatory relations are shown in different stages of the cell cycle. GeneNetFinder2 is available at http://bclab.inha.ac.kr/GeneNetFinder and will be useful for modeling dynamic gene regulations with multiple regulators.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonilla-Huerta:2016:HFU, author = "Edmundo Bonilla-Huerta and Alberto Hernandez-Montiel and Roberto-Morales Caporal and Marco Arjona Lopez", title = "Hybrid Framework Using Multiple-Filters and an Embedded Approach for an Efficient Selection and Classification of Microarray Data", journal = j-TCBB, volume = "13", number = "1", pages = "12--26", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474384", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A hybrid framework composed of two stages for gene selection and classification of DNA microarray data is proposed. At the first stage, five traditional statistical methods are combined for preliminary gene selection (Multiple Fusion Filter). Then, different relevant gene subsets are selected by using an embedded Genetic Algorithm (GA), Tabu Search (TS), and Support Vector Machine (SVM). A gene subset, consisting of the most relevant genes, is obtained from this process, by analyzing the frequency of each gene in the different gene subsets. Finally, the most frequent genes are evaluated by the embedded approach to obtain a final relevant small gene subset with high performance. The proposed method is tested in four DNA microarray datasets. From simulation study, it is observed that the proposed approach works better than other methods reported in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Deng:2016:PHG, author = "Su-Ping Deng and Lin Zhu and De-Shuang Huang", title = "Predicting Hub Genes Associated with Cervical Cancer through Gene Co-Expression Networks", journal = j-TCBB, volume = "13", number = "1", pages = "27--35", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476790", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cervical cancer is the third most common malignancy in women worldwide. It remains a leading cause of cancer-related death for women in developing countries. In order to contribute to the treatment of the cervical cancer, in our work, we try to find a few key genes resulting in the cervical cancer. Employing functions of several bioinformatics tools, we selected 143 differentially expressed genes (DEGs) associated with the cervical cancer. The results of bioinformatics analysis show that these DEGs play important roles in the development of cervical cancer. Through comparing two differential co-expression networks (DCNs) at two different states, we found a common sub-network and two differential sub-networks as well as some hub genes in three sub-networks. Moreover, some of the hub genes have been reported to be related to the cervical cancer. Those hub genes were analyzed from Gene Ontology function enrichment, pathway enrichment and protein binding three aspects. The results can help us understand the development of the cervical cancer and guide further experiments about the cervical cancer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qu:2016:PSL, author = "Xumi Qu and Dong Wang and Yuehui Chen and Shanping Qiao and Qing Zhao", title = "Predicting the Subcellular Localization of Proteins with Multiple Sites Based on Multiple Features Fusion", journal = j-TCBB, volume = "13", number = "1", pages = "36--42", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2485207", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein sub-cellular localization prediction has attracted much attention in recent years because of its importance for protein function studying and targeted drug discovery, and that makes it to be an important research field in bioinformatics. Traditional experimental methods which ascertain the protein sub-cellular locations are costly and time consuming. In the last two decades, machine learning methods got increasing development, and a large number of machine learning based protein sub-cellular location predictors have been developed. However, most of such predictors can only predict proteins in only one subcellular location. With the development of biology techniques, more and more proteins which have two or even more sub-cellular locations have been found. It is much more significant to study such proteins because they have extremely useful implication for both basic biology and bioinformatics research. In order to improve the accuracy of prediction, much more feature information which can represent the protein sequence should be extracted. In this paper, several feature extraction methods were fused together to extract the feature information, then the multi-label k nearest neighbors (ML-KNN) algorithm was used to predict protein sub-cellular locations. The best overall accuracies we got for dataset s1 in constructing Gpos-mploc is 66.7304 and 59.9206 percent for dataset s2 in constructing Virus-mPLoc.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hsieh:2016:FCM, author = "Sun-Yuan Hsieh and Yu-Chun Chou", title = "A Faster {cDNA} Microarray Gene Expression Data Classifier for Diagnosing Diseases", journal = j-TCBB, volume = "13", number = "1", pages = "43--54", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474389", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Profiling cancer molecules has several advantages; however, using microarray technology in routine clinical diagnostics is challenging for physicians. The classification of microarray data has two main limitations: (1) the data set is unreliable for building classifiers; and (2) the classifiers exhibit poor performance. Current microarray classification algorithms typically yield a high rate of false-positives cases, which is unacceptable in diagnostic applications. Numerous algorithms have been developed to detect false-positive cases; however, they require a considerable computation time. To address this problem, this study enhanced a previously proposed gene expression graph (GEG)-based classifier to shorten the computation time. The modified classifier filters genes by using an edge weight to determine their significance, thereby facilitating accurate comparison and classification. This study experimentally compared the proposed classifier with a GEG-based classifier by using real data and benchmark tests. The results show that the proposed classifier is faster at detecting false-positives.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2016:CPE, author = "Lin Zhu and Wei-Li Guo and Su-Ping Deng and De-Shuang Huang", title = "{ChIP--PIT}: Enhancing the Analysis of {ChIP-Seq} Data Using Convex-Relaxed Pair-Wise Interaction Tensor Decomposition", journal = j-TCBB, volume = "13", number = "1", pages = "55--63", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465893", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In recent years, thanks to the efforts of individual scientists and research consortiums, a huge amount of chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) experimental data have been accumulated. Instead of investigating them independently, several recent studies have convincingly demonstrated that a wealth of scientific insights can be gained by integrative analysis of these ChIP-seq data. However, when used for the purpose of integrative analysis, a serious drawback of current ChIP-seq technique is that it is still expensive and time-consuming to generate ChIP-seq datasets of high standard. Most researchers are therefore unable to obtain complete ChIP-seq data for several TFs in a wide variety of cell lines, which considerably limits the understanding of transcriptional regulation pattern. In this paper, we propose a novel method called ChIP-PIT to overcome the aforementioned limitation. In ChIP-PIT, ChIP-seq data corresponding to a diverse collection of cell types, TFs and genes are fused together using the three-mode pair-wise interaction tensor (PIT) model, and the prediction of unperformed ChIP-seq experimental results is formulated as a tensor completion problem. Computationally, we propose efficient first-order method based on extensions of coordinate descent method to learn the optimal solution of ChIP-PIT, which makes it particularly suitable for the analysis of massive scale ChIP-seq data. Experimental evaluation the ENCODE data illustrate the usefulness of the proposed model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hsiao:2016:PGI, author = "Yu-Ting Hsiao and Wei-Po Lee and Wei Yang and Stefan Muller and Christoph Flamm and Ivo Hofacker and Philipp Kugler", title = "Practical Guidelines for Incorporating Knowledge-Based and Data-Driven Strategies into the Inference of Gene Regulatory Networks", journal = j-TCBB, volume = "13", number = "1", pages = "64--75", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465954", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Modeling gene regulatory networks (GRNs) is essential for conceptualizing how genes are expressed and how they influence each other. Typically, a reverse engineering approach is employed; this strategy is effective in reproducing possible fitting models of GRNs. To use this strategy, however, two daunting tasks must be undertaken: one task is to optimize the accuracy of inferred network behaviors; and the other task is to designate valid biological topologies for target networks. Although existing studies have addressed these two tasks for years, few of the studies can satisfy both of the requirements simultaneously. To address these difficulties, we propose an integrative modeling framework that combines knowledge-based and data-driven input sources to construct biological topologies with their corresponding network behaviors. To validate the proposed approach, a real dataset collected from the cell cycle of the yeast S. cerevisiae is used. The results show that the proposed framework can successfully infer solutions that meet the requirements of both the network behaviors and biological structures. Therefore, the outcomes are exploitable for future in vivo experimental design.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fang:2016:IGS, author = "Yi Fang and Mengtian Sun and Guoxian Dai and Karthik Ramain", title = "The Intrinsic Geometric Structure of Protein-Protein Interaction Networks for Protein Interaction Prediction", journal = j-TCBB, volume = "13", number = "1", pages = "76--85", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2456876", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent developments in high-throughput technologies for measuring protein-protein interaction (PPI) have profoundly advanced our ability to systematically infer protein function and regulation. However, inherently high false positive and false negative rates in measurement have posed great challenges in computational approaches for the prediction of PPI. A good PPI predictor should be (1) resistant to high rate of missing and spurious PPIs, and (2) robust against incompleteness of observed PPI networks. To predict PPI in a network, we developed an intrinsic geometry structure (IGS) for network, which exploits the intrinsic and hidden relationship among proteins in network through a heat diffusion process. In this process, all explicit PPIs participate simultaneously to glue local infinitesimal and noisy experimental interaction data to generate a global macroscopic descriptions about relationships among proteins. The revealed implicit relationship can be interpreted as the probability of two proteins interacting with each other. The revealed relationship is intrinsic and robust against individual, local and explicit protein interactions in the original network. We apply our approach to publicly available PPI network data for the evaluation of the performance of PPI prediction. Experimental results indicate that, under different levels of the missing and spurious PPIs, IGS is able to robustly exploit the intrinsic and hidden relationship for PPI prediction with a higher sensitivity and specificity compared to that of recently proposed methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2016:NTL, author = "Yu-Huei Cheng", title = "A Novel Teaching-Learning-Based Optimization for Improved Mutagenic Primer Design in Mismatch {PCR-RFLP SNP} Genotyping", journal = j-TCBB, volume = "13", number = "1", pages = "86--98", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430354", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many single nucleotide polymorphisms (SNPs) for complex genetic diseases are genotyped by polymerase chain reaction-restriction fragment length polymorphism (PCR-RFLP) in small-scale basic research studies. It is an essential work to design feasible PCR-RFLP primer pair and find out available restriction enzymes to recognize the target SNP for PCR experiments. However, many SNPs are incapable of performing PCR-RFLP makes SNP genotyping become unpractical. A genetic algorithm (GA) had been proposed for designing mutagenic primer and get available restriction enzymes, but it gives an unrefined solution in mutagenic primers. In order to improve the mutagenic primer design, we propose TLBOMPD (TLBO-based Mutagenic Primer Design) a novel computational intelligence-based method that uses the notion of ``teaching and learning'' to search for more feasible mutagenic primers and provide the latest available restriction enzymes. The original Wallace's formula for the calculation of melting temperature is maintained, and more accurate calculation formulas of GC-based melting temperature and thermodynamic melting temperature are introduced into the proposed method. Mutagenic matrix is also reserved to increase the efficiency of judging a hypothetical mutagenic primer if involve available restriction enzymes for recognizing the target SNP. Furthermore, the core of SNP-RFLPing version 2 is used to enhance the mining work for restriction enzymes based on the latest REBASE. Twenty-five SNPs with mismatch PCR-RFLP screened from 288 SNPs in human SLC6A4 gene are used to appraise the TLBOMPD. Also, the computational results are compared with those of the GAMPD. In the future, the usage of the mutagenic primers in the wet lab needs to been validated carefully to increase the reliability of the method. The TLBOMPD is implemented in JAVA and it is freely available at http://tlbompd.googlecode.com/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Roy:2016:DMB, author = "Indranil Roy and Srinivas Aluru", title = "Discovering Motifs in Biological Sequences Using the {Micron} Automata Processor", journal = j-TCBB, volume = "13", number = "1", pages = "99--111", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430313", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Finding approximately conserved sequences, called motifs, across multiple DNA or protein sequences is an important problem in computational biology. In this paper, we consider the $ (l, d) $ motif search problem of identifying one or more motifs of length $l$ present in at least $q$ of the $n$ given sequences, with each occurrence differing from the motif in at most $d$ substitutions. The problem is known to be NP-complete, and the largest solved instance reported to date is $ (26, 11)$ . We propose a novel algorithm for the $ (l, d)$ motif search problem using streaming execution over a large set of non-deterministic finite automata (NFA). This solution is designed to take advantage of the micron automata processor, a new technology close to deployment that can simultaneously execute multiple NFA in parallel. We demonstrate the capability for solving much larger instances of the $ (l, d)$ motif search problem using the resources available within a single automata processor board, by estimating run-times for problem instances $ (39, 18)$ and $ (40, 17)$ . The paper serves as a useful guide to solving problems using this new accelerator technology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bahlouli:2016:FBP, author = "S. Bahlouli and A. Mokaddem and F. Hamdache and H. Riane and M. Kameche", title = "Fractal Behavior of the Pancreatic $ \beta $-Cell Near the Percolation Threshold: Effect of the {K$_{\rm ATP}$} Channel On the Electrical Response", journal = j-TCBB, volume = "13", number = "1", pages = "112--121", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415797", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The molecular system built with true chemical bonds or strong molecular interaction can be described using conceptual mathematical tools. Modeling of the natural generated ionic currents on the human pancreatic $ \beta $ -cell activity had been already studied using complicated analytical models. In our present contribution, we prove the same using our simple electrical model. The ionic currents are associated with different proteins membrane channels (K-Ca, K$_v$, K$_{ATP}$, Ca$_v$ -L) and Na/Ca Exchanger (NCX). The proteins are Ohmic conductors and are modeled by conductance randomly distributed. Switches are placed in series with conductances in order to highlight the channel activity. However, the K$_{ATP}$ channel activity is stimulated by glucose, and the NCX's conductance change according to the intracellular calcium concentration. The percolation threshold of the system is calculated by the fractal nature of the infinite cluster using the Tarjan's depth-first-search algorithm. It is shown that the behavior of the internal concentration of Ca$^{2+}$ and the membrane potential are modulated by glucose. The results confirm that the inhibition of K$_{ATP}$ channels depolarizes the membrane and increases the influx of [Ca$^{2+}$]$_i$ through NCX and Ca$_v$ -L channel for high glucose concentrations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ullah:2016:GAC, author = "Ehsan Ullah and Shuchin Aeron and Soha Hassoun", title = "{gEFM}: an Algorithm for Computing Elementary Flux Modes Using Graph Traversal", journal = j-TCBB, volume = "13", number = "1", pages = "122--134", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430344", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational methods to engineer cellular metabolism promise to play a critical role in producing pharmaceutical, repairing defective genes, destroying cancer cells, and generating biofuels. Elementary Flux Mode (EFM) analysis is one such powerful technique that has elucidated cell growth and regulation, predicted product yield, and analyzed network robustness. EFM analysis, however, is a computationally daunting task because it requires the enumeration of all independent and stoichiometrically balanced pathways within a cellular network. We present in this paper an EFM enumeration algorithm, termed graphical EFM or gEFM. The algorithm is based on graph traversal, an approach previously assumed unsuitable for enumerating EFMs. The approach is derived from a pathway synthesis method proposed by Mavrovouniotis et al. The algorithm is described and proved correct. We apply gEFM to several networks and report runtimes in comparison with other EFM computation tools. We show how gEFM benefits from network compression. Like other EFM computational techniques, gEFM is sensitive to constraint ordering; however, we are able to demonstrate that knowledge of the underlying network structure leads to better constraint ordering. gEFM is shown to be competitive with state-of-the-art EFM computational techniques for several networks, but less so for networks with a larger number of EFMs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2016:GAS, author = "Xian Zhang and Ligang Wu and Jiahua Zou", title = "Globally Asymptotic Stability Analysis for Genetic Regulatory Networks with Mixed Delays: an {$M$}-Matrix-Based Approach", journal = j-TCBB, volume = "13", number = "1", pages = "135--147", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424432", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper deals with the problem of globally asymptotic stability for nonnegative equilibrium points of genetic regulatory networks (GRNs) with mixed delays (i.e., time-varying discrete delays and constant distributed delays). Up to now, all existing stability criteria for equilibrium points of the kind of considered GRNs are in the form of the linear matrix inequalities (LMIs). In this paper, the Brouwer's fixed point theorem is employed to obtain sufficient conditions such that the kind of GRNs under consideration here has at least one nonnegative equilibrium point. Then, by using the nonsingular M-matrix theory and the functional differential equation theory, M-matrix-based sufficient conditions are proposed to guarantee that the kind of GRNs under consideration here has a unique nonnegative equilibrium point which is globally asymptotically stable. The M-matrix-based sufficient conditions derived here are to check whether a constant matrix is a nonsingular M-matrix, which can be easily verified, as there are many equivalent statements on the nonsingular M-matrices. So, in terms of computational complexity, the M-matrix-based stability criteria established in this paper are superior to the LMI-based ones in literature. To illustrate the effectiveness of the approach proposed in this paper, several numerical examples and their simulations are given.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ranganarayanan:2016:IGB, author = "Preethi Ranganarayanan and Narmadha Thanigesan and Vivek Ananth and Valadi K. Jayaraman and Vigneshwar Ramakrishnan", title = "Identification of Glucose-Binding Pockets in Human Serum Albumin Using Support Vector Machine and Molecular Dynamics Simulations", journal = j-TCBB, volume = "13", number = "1", pages = "148--157", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415806", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Human Serum Albumin (HSA) has been suggested to be an alternate biomarker to the existing Hemoglobin-A1c (HbA1c) marker for glycemic monitoring. Development and usage of HSA as an alternate biomarker requires the identification of glycation sites, or equivalently, glucose-binding pockets. In this work, we combine molecular dynamics simulations of HSA and the state-of-art machine learning method Support Vector Machine (SVM) to predict glucose-binding pockets in HSA. SVM uses the three dimensional arrangement of atoms and their chemical properties to predict glucose-binding ability of a pocket. Feature selection reveals that the arrangement of atoms and their chemical properties within the first 4{\AA} from the centroid of the pocket play an important role in the binding of glucose. With a 10-fold cross validation accuracy of 84 percent, our SVM model reveals seven new potential glucose-binding sites in HSA of which two are exposed only during the dynamics of HSA. The predictions are further corroborated using docking studies. These findings can complement studies directed towards the development of HSA as an alternate biomarker for glycemic monitoring.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rivera-Borroto:2016:RAM, author = "Oscar Miguel Rivera-Borroto and Jose Manuel Garcia-de la Vega and Yovani Marrero-Ponce and Ricardo Grau", title = "Relational Agreement Measures for Similarity Searching of Cheminformatic Data Sets", journal = j-TCBB, volume = "13", number = "1", pages = "158--167", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2424435", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Research on similarity searching of cheminformatic data sets has been focused on similarity measures using fingerprints. However, nominal scales are the least informative of all metric scales, increasing the tied similarity scores, and decreasing the effectivity of the retrieval engines. Tanimoto's coefficient has been claimed to be the most prominent measure for this task. Nevertheless, this field is far from being exhausted since the computer science no free lunch theorem predicts that ``no similarity measure has overall superiority over the population of data sets''. We introduce 12 relational agreement (RA) coefficients for seven metric scales, which are integrated within a group fusion-based similarity searching algorithm. These similarity measures are compared to a reference panel of 21 proximity quantifiers over 17 benchmark data sets (MUV), by using informative descriptors, a feature selection stage, a suitable performance metric, and powerful comparison tests. In this stage, RA coefficients perform favourably with respect to the state-of-the-art proximity measures. Afterward, the RA-based method outperform another four nearest neighbor searching algorithms over the same data domains. In a third validation stage, RA measures are successfully applied to the virtual screening of the NCI data set. Finally, we discuss a possible molecular interpretation for these similarity variants.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anonymous:2016:RL, author = "Anonymous", title = "2015 reviewers list", journal = j-TCBB, volume = "13", number = "1", pages = "168--171", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2522778", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The publication offers a note of thanks and lists its reviewers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Title:2016:IIA, author = "Title", title = "2015 Index {IEEE\slash ACM Transactions on Computational Biology and Bioinformatics} Vol. 12", journal = j-TCBB, volume = "13", number = "1", pages = "172--195", month = jan, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2523430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:42 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This index covers all technical items --- papers, correspondence, reviews, etc. --- that appeared in this periodical during the year, and items from previous years that were commented upon or corrected in this year. Departments and other items may also be covered if they have been judged to have archival value. The Author Index contains the primary entry for each item, listed under the first author's name. The primary entry includes the co-authors' names, the title of the paper or other item, and its location, specified by the publication abbreviation, year, month, and inclusive pagination. The Subject Index contains entries describing the item under all appropriate subject headings, plus the first author's name, the publication abbreviation, month, and year, and inclusive pages. Note that the item title is found only under the primary entry in the Author Index.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guzzi:2016:GES, author = "Pietro H. Guzzi and Marco Mina", title = "Guest Editorial for Special Section on Semantic-Based Approaches for Analysis of Biological Data", journal = j-TCBB, volume = "13", number = "2", pages = "196--196", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2535578", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Agapito:2016:ECO, author = "Giuseppe Agapito and Marianna Milano and Pietro Hiram Guzzi and Mario Cannataro", title = "Extracting Cross-Ontology Weighted Association Rules from Gene Ontology Annotations", journal = j-TCBB, volume = "13", number = "2", pages = "197--208", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2462348", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene Ontology (GO) is a structured repository of concepts (GO Terms) that are associated to one or more gene products through a process referred to as annotation. The analysis of annotated data is an important opportunity for bioinformatics. There are different approaches of analysis, among those, the use of association rules (AR) which provides useful knowledge, discovering biologically relevant associations between terms of GO, not previously known. In a previous work, we introduced GO-WAR (Gene Ontology-based Weighted Association Rules), a methodology for extracting weighted association rules from ontology-based annotated datasets. We here adapt the GO-WAR algorithm to mine cross-ontology association rules, i.e., rules that involve GO terms present in the three sub-ontologies of GO. We conduct a deep performance evaluation of GO-WAR by mining publicly available GO annotated datasets, showing how GO-WAR outperforms current state of the art approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Masseroli:2016:IQG, author = "Marco Masseroli and Arif Canakoglu and Stefano Ceri", title = "Integration and Querying of Genomic and Proteomic Semantic Annotations for Biomedical Knowledge Extraction", journal = j-TCBB, volume = "13", number = "2", pages = "209--219", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2453944", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Understanding complex biological phenomena involves answering complex biomedical questions on multiple biomolecular information simultaneously, which are expressed through multiple genomic and proteomic semantic annotations scattered in many distributed and heterogeneous data sources; such heterogeneity and dispersion hamper the biologists' ability of asking global queries and performing global evaluations. To overcome this problem, we developed a software architecture to create and maintain a Genomic and Proteomic Knowledge Base (GPKB), which integrates several of the most relevant sources of such dispersed information (including Entrez Gene, UniProt, IntAct, Expasy Enzyme, GO, GOA, BioCyc, KEGG, Reactome, and OMIM). Our solution is general, as it uses a flexible, modular, and multilevel global data schema based on abstraction and generalization of integrated data features, and a set of automatic procedures for easing data integration and maintenance, also when the integrated data sources evolve in data content, structure, and number. These procedures also assure consistency, quality, and provenance tracking of all integrated data, and perform the semantic closure of the hierarchical relationships of the integrated biomedical ontologies. At http://www.bioinformatics.deib.polimi.it/GPKB/, a Web interface allows graphical easy composition of queries, although complex, on the knowledge base, supporting also semantic query expansion and comprehensive explorative search of the integrated data to better sustain biomedical knowledge extraction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2016:PPF, author = "Guoxian Yu and Guangyuan Fu and Jun Wang and Hailong Zhu", title = "Predicting Protein Function via Semantic Integration of Multiple Networks", journal = j-TCBB, volume = "13", number = "2", pages = "220--232", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459713", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Determining the biological functions of proteins is one of the key challenges in the post-genomic era. The rapidly accumulated large volumes of proteomic and genomic data drives to develop computational models for automatically predicting protein function in large scale. Recent approaches focus on integrating multiple heterogeneous data sources and they often get better results than methods that use single data source alone. In this paper, we investigate how to integrate multiple biological data sources with the biological knowledge, i.e., Gene Ontology (GO), for protein function prediction. We propose a method, called SimNet, to Semantically integrate multiple functional association Networks derived from heterogeneous data sources. SimNet firstly utilizes GO annotations of proteins to capture the semantic similarity between proteins and introduces a semantic kernel based on the similarity. Next, SimNet constructs a composite network, obtained as a weighted summation of individual networks, and aligns the network with the kernel to get the weights assigned to individual networks. Then, it applies a network-based classifier on the composite network to predict protein function. Experiment results on heterogeneous proteomic data sources of Yeast, Human, Mouse, and Fly show that, SimNet not only achieves better (or comparable) results than other related competitive approaches, but also takes much less time. The Matlab codes of SimNet are available at https://sites.google.com/site/guoxian85/simnet.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fernandez:2016:OBS, author = "Javier D. Fernandez and Maurizio Lenzerini and Marco Masseroli and Francesco Venco and Stefano Ceri", title = "Ontology-Based Search of Genomic Metadata", journal = j-TCBB, volume = "13", number = "2", pages = "233--247", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495179", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Encyclopedia of DNA Elements (ENCODE) is a huge and still expanding public repository of more than 4,000 experiments and 25,000 data files, assembled by a large international consortium since 2007; unknown biological knowledge can be extracted from these huge and largely unexplored data, leading to data-driven genomic, transcriptomic, and epigenomic discoveries. Yet, search of relevant datasets for knowledge discovery is limitedly supported: metadata describing ENCODE datasets are quite simple and incomplete, and not described by a coherent underlying ontology. Here, we show how to overcome this limitation, by adopting an ENCODE metadata searching approach which uses high-quality ontological knowledge and state-of-the-art indexing technologies. Specifically, we developed S.O.S. GeM (http://www.bioinformatics.deib.polimi.it/SOSGeM/), a system supporting effective semantic search and retrieval of ENCODE datasets. First, we constructed a Semantic Knowledge Base by starting with concepts extracted from ENCODE metadata, matched to and expanded on biomedical ontologies integrated in the well-established Unified Medical Language System. We prove that this inference method is sound and complete. Then, we leveraged the Semantic Knowledge Base to semantically search ENCODE data from arbitrary biologists' queries. This allows correctly finding more datasets than those extracted by a purely syntactic search, as supported by the other available systems. We empirically show the relevance of found datasets to the biologists' queries.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chicco:2016:OBP, author = "Davide Chicco and Marco Masseroli", title = "Ontology-Based Prediction and Prioritization of Gene Functional Annotations", journal = j-TCBB, volume = "13", number = "2", pages = "248--260", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459694", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genes and their protein products are essential molecular units of a living organism. The knowledge of their functions is key for the understanding of physiological and pathological biological processes, as well as in the development of new drugs and therapies. The association of a gene or protein with its functions, described by controlled terms of biomolecular terminologies or ontologies, is named gene functional annotation. Very many and valuable gene annotations expressed through terminologies and ontologies are available. Nevertheless, they might include some erroneous information, since only a subset of annotations are reviewed by curators. Furthermore, they are incomplete by definition, given the rapidly evolving pace of biomolecular knowledge. In this scenario, computational methods that are able to quicken the annotation curation process and reliably suggest new annotations are very important. Here, we first propose a computational pipeline that uses different semantic and machine learning methods to predict novel ontology-based gene functional annotations; then, we introduce a new semantic prioritization rule to categorize the predicted annotations by their likelihood of being correct. Our tests and validations proved the effectiveness of our pipeline and prioritization of predicted annotations, by selecting as most likely manifold predicted annotations that were later confirmed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wong:2016:CSD, author = "Ka-Chun Wong and Yue Li and Chengbin Peng and Hau-San Wong", title = "A Comparison Study for {DNA} Motif Modeling on Protein Binding Microarray", journal = j-TCBB, volume = "13", number = "2", pages = "261--271", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2443782", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Transcription factor binding sites (TFBSs) are relatively short (5-15 bp) and degenerate. Identifying them is a computationally challenging task. In particular, protein binding microarray (PBM) is a high-throughput platform that can measure the DNA binding preference of a protein in a comprehensive and unbiased manner; for instance, a typical PBM experiment can measure binding signal intensities of a protein to all possible DNA k-mers ( $ k = 8 \sim $ 10). Since proteins can often bind to DNA with different binding intensities, one of the major challenges is to build TFBS (also known as DNA motif) models which can fully capture the quantitative binding affinity data. To learn DNA motif models from the non-convex objective function landscape, several optimization methods are compared and applied to the PBM motif model building problem. In particular, representative methods from different optimization paradigms have been chosen for modeling performance comparison on hundreds of PBM datasets. The results suggest that the multimodal optimization methods are very effective for capturing the binding preference information from PBM data. In particular, we observe a general performance improvement if choosing di-nucleotide modeling over mono-nucleotide modeling. In addition, the models learned by the best-performing method are applied to two independent applications: PBM probe rotation testing and ChIP-Seq peak sequence prediction, demonstrating its biological applicability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2016:LDA, author = "An Nguyen and Adam Prugel-Bennett and Srinandan Dasmahapatra", title = "A Low Dimensional Approximation For Competence In \bioname{Bacillus Subtilis}", journal = j-TCBB, volume = "13", number = "2", pages = "272--280", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2440275", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The behaviour of a high dimensional stochastic system described by a chemical master equation (CME) depends on many parameters, rendering explicit simulation an inefficient method for exploring the properties of such models. Capturing their behaviour by low-dimensional models makes analysis of system behaviour tractable. In this paper, we present low dimensional models for the noise-induced excitable dynamics in Bacillus subtilis, whereby a key protein ComK, which drives a complex chain of reactions leading to bacterial competence, gets expressed rapidly in large quantities (competent state) before subsiding to low levels of expression (vegetative state). These rapid reactions suggest the application of an adiabatic approximation of the dynamics of the regulatory model that, however, lead to competence durations that are incorrect by a factor of 2. We apply a modified version of an iterative functional procedure that faithfully approximates the time-course of the trajectories in terms of a two-dimensional model involving proteins ComK and ComS. Furthermore, in order to describe the bimodal bivariate marginal probability distribution obtained from the Gillespie simulations of the CME, we introduce a tunable multiplicative noise term in a two-dimensional Langevin model whose stationary state is described by the time-independent solution of the corresponding Fokker--Planck equation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ni:2016:PME, author = "Xumin Ni and Wei Guo and Kai Yuan and Xiong Yang and Zhiming Ma and Shuhua Xu and Shihua Zhang", title = "A Probabilistic Method for Estimating the Sharing of Identity by Descent for Populations with Migration", journal = j-TCBB, volume = "13", number = "2", pages = "281--290", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2480074", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The inference of demographic history of populations is an important undertaking in population genetics. A few recent studies have developed identity-by-descent (IBD) based methods to reveal the signature of the relatively recent historical events. Notably, Pe'er and his colleagues have introduced a novel method (named PIBD here) by employing IBD sharing to infer effective population size and migration rate. However, under island model, PIBD neglects the coalescent information before the time to the most recent common ancestor (tMRCA) which leads to apparent deviations in certain situations. In this paper, we propose a new method, MIBD, by adopting a Markov process to describe the island model and develop a new formula for estimating IBD sharing. The new formula considers the coalescent information before tMRCA and the joint effect of the coalescent and migration events. We apply both MIBD and PIBD to the genome-wide data of two human populations (Palestinian and Bedouin) obtained from the HGDP-CEPH database, and demonstrate that MIBD is competitive to PIBD. Our simulation analyses also show that the results of MIBD are more accurate than those of PIBD especially in the case of small effective population size.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2016:RTN, author = "Yuanqi Hu and Pantelis Georgiou", title = "A Real-Time de novo {DNA} Sequencing Assembly Platform Based on an {FPGA} Implementation", journal = j-TCBB, volume = "13", number = "2", pages = "291--300", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2442974", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents an FPGA based DNA comparison platform which can be run concurrently with the sensing phase of DNA sequencing and shortens the overall time needed for de novo DNA assembly. A hybrid overlap searching algorithm is applied which is scalable and can deal with incremental detection of new bases. To handle the incomplete data set which gradually increases during sequencing time, all-against-all comparisons are broken down into successive window-against-window comparison phases and executed using a novel dynamic suffix comparison algorithm combined with a partitioned dynamic programming method. The complete system has been designed to facilitate parallel processing in hardware, which allows real-time comparison and full scalability as well as a decrease in the number of computations required. A base pair comparison rate of 51.2 G/s is achieved when implemented on an FPGA with successful DNA comparison when using data sets from real genomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Singh:2016:BAI, author = "Nitin Singh and Mathukumalli Vidyasagar", title = "{bLARS}: an Algorithm to Infer Gene Regulatory Networks", journal = j-TCBB, volume = "13", number = "2", pages = "301--314", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2450740", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Inferring gene regulatory networks (GRNs) from high-throughput gene-expression data is an important and challenging problem in systems biology. Several existing algorithms formulate GRN inference as a regression problem. The available regression based algorithms are based on the assumption that all regulatory interactions are linear. However, nonlinear transcription regulation mechanisms are common in biology. In this work, we propose a new regression based method named bLARS that permits a variety of regulatory interactions from a predefined but otherwise arbitrary family of functions. On three DREAM benchmark datasets, namely gene expression data from E. coli, Yeast, and a synthetic data set, bLARS outperforms state-of-the-art algorithms in the terms of the overall score. On the individual networks, bLARS offers the best performance among currently available similar algorithms, namely algorithms that do not use perturbation information and are not meta-algorithms. Moreover, the presented approach can also be utilized for general feature selection problems in domains other than biology, provided they are of a similar structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Taliun:2016:FSB, author = "Daniel Taliun and Johann Gamper and Ulf Leser and Cristian Pattaro", title = "Fast Sampling-Based Whole-Genome Haplotype Block Recognition", journal = j-TCBB, volume = "13", number = "2", pages = "315--325", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2456897", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Scaling linkage disequilibrium (LD) based haplotype block recognition to the entire human genome has always been a challenge. The best-known algorithm has quadratic runtime complexity and, even when sophisticated search space pruning is applied, still requires several days of computations. Here, we propose a novel sampling-based algorithm, called S-MIG$^{++}$ , where the main idea is to estimate the area that most likely contains all haplotype blocks by sampling a very small number of SNP pairs. A subsequent refinement step computes the exact blocks by considering only the SNP pairs within the estimated area. This approach significantly reduces the number of computed LD statistics, making the recognition of haplotype blocks very fast. We theoretically and empirically prove that the area containing all haplotype blocks can be estimated with a very high degree of certainty. Through experiments on the 243,080 SNPs on chromosome 20 from the 1,000 Genomes Project, we compared our previous algorithm MIG$^{++}$ with the new S-MIG$^{++}$ and observed a runtime reduction from 2.8 weeks to 34.8 hours. In a parallelized version of the S-MIG $^{++}$ algorithm using 32 parallel processes, the runtime was further reduced to 5.1 hours.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sigdel:2016:FFS, author = "Madhu S. Sigdel and Madhav Sigdel and Semih Dinc and Imren Dinc and Marc L. Pusey and Ramazan S. Aygun", title = "{FocusALL}: Focal Stacking of Microscopic Images Using Modified {Harris} Corner Response Measure", journal = j-TCBB, volume = "13", number = "2", pages = "326--340", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459685", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Automated image analysis of microscopic images such as protein crystallization images and cellular images is one of the important research areas. If objects in a scene appear at different depths with respect to the camera's focal point, objects outside the depth of field usually appear blurred. Therefore, scientists capture a collection of images with different depths of field. Focal stacking is a technique of creating a single focused image from a stack of images collected with different depths of field. In this paper, we introduce a novel focal stacking technique, FocusALL, which is based on our modified Harris Corner Response Measure. We also propose enhanced FocusALL for application on images collected under high resolution and varying illumination. FocusALL resolves problems related to the assumption that focus regions have high contrast and high intensity. Especially, FocusALL generates sharper boundaries around protein crystal regions and good in focus images for high resolution images in reasonable time. FocusALL outperforms other methods on protein crystallization images and performs comparably well on other datasets such as retinal epithelial images and simulated datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Land:2016:MCS, author = "Tyler A. Land and Perry Fizzano and Robin B. Kodner", title = "Measuring Cluster Stability in a Large Scale Phylogenetic Analysis of Functional Genes in Metagenomes Using {{\tt pplacer}}", journal = j-TCBB, volume = "13", number = "2", pages = "341--349", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2446470", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Analysis of metagenomic sequence data requires a multi-stage workflow. The results of each intermediate step possess an inherent uncertainty and potentially impact the as-yet-unmeasured statistical significance of downstream analyses. Here, we describe our phylogenetic analysis pipeline which uses the {\tt pplacer} program to place many shotgun sequences corresponding to a single functional gene onto a fixed phylogenetic tree. We then use the squash clustering method to compare multiple samples with respect to that gene. We approximate the statistical significance of each gene's clustering result by measuring its cluster stability, the consistency of that clustering result when the probabilistic placements made by {\tt pplacer} are systematically reassigned and then clustered again, as measured by the adjusted Rand Index. We find that among the genes investigated, the majority of analyses are stable, based on the average adjusted Rand Index. We investigated properties of each gene that may explain less stable results. These genes tended to have less convex reference trees, less total reads recruited to the gene, and a greater Expected Distance between Placement Locations as given by {\tt pplacer} when examined in aggregate. However, for an individual functional gene, these measures alone do not predict cluster stability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ganesan:2016:PSC, author = "Narayan Ganesan and Jie Li and Vishakha Sharma and Hanyu Jiang and Adriana Compagnoni", title = "Process Simulation of Complex Biological Pathways in Physical Reactive Space and Reformulated for Massively Parallel Computing Platforms", journal = j-TCBB, volume = "13", number = "2", pages = "365--379", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2443784", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological systems encompass complexity that far surpasses many artificial systems. Modeling and simulation of large and complex biochemical pathways is a computationally intensive challenge. Traditional tools, such as ordinary differential equations, partial differential equations, stochastic master equations, and Gillespie type methods, are all limited either by their modeling fidelity or computational efficiency or both. In this work, we present a scalable computational framework based on modeling biochemical reactions in explicit 3D space, that is suitable for studying the behavior of large and complex biological pathways. The framework is designed to exploit parallelism and scalability offered by commodity massively parallel processors such as the graphics processing units (GPUs) and other parallel computing platforms. The reaction modeling in 3D space is aimed at enhancing the realism of the model compared to traditional modeling tools and framework. We introduce the Parallel Select algorithm that is key to breaking the sequential bottleneck limiting the performance of most other tools designed to study biochemical interactions. The algorithm is designed to be computationally tractable, handle hundreds of interacting chemical species and millions of independent agents by considering all-particle interactions within the system. We also present an implementation of the framework on the popular graphics processing units and apply it to the simulation study of JAK-STAT Signal Transduction Pathway. The computational framework will offer a deeper insight into various biological processes within the cell and help us observe key events as they unfold in space and time. This will advance the current state-of-the-art in simulation study of large scale biological systems and also enable the realistic simulation study of macro-biological cultures, where inter-cellular interactions are prevalent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tu:2016:UFC, author = "Xudong Tu and Yuanliang Wang and Maolan Zhang and Jinchuan Wu", title = "Using Formal Concept Analysis to Identify Negative Correlations in Gene Expression Data", journal = j-TCBB, volume = "13", number = "2", pages = "380--391", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2443805", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, many biological studies reported that two groups of genes tend to show negatively correlated or opposite expression tendency in many biological processes or pathways. The negative correlation between genes may imply an important biological mechanism. In this study, we proposed a FCA-based negative correlation algorithm (NCFCA) that can effectively identify opposite expression tendency between two gene groups in gene expression data. After applying it to expression data of cell cycle-regulated genes in yeast, we found that six minichromosome maintenance family genes showed the opposite changing tendency with eight core histone family genes. Furthermore, we confirmed that the negative correlation expression pattern between these two families may be conserved in the cell cycle. Finally, we discussed the reasons underlying the negative correlation of six minichromosome maintenance (MCM) family genes with eight core histone family genes. Our results revealed that negative correlation is an important and potential mechanism that maintains the balance of biological systems by repressing some genes while inducing others. It can thus provide new understanding of gene expression and regulation, the causes of diseases, etc.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2016:CIB, author = "Jin-Xing Liu and Yong Xu and Ying-Lian Gao and Chun-Hou Zheng and Dong Wang and Qi Zhu", title = "A Class-Information-Based Sparse Component Analysis Method to Identify Differentially Expressed Genes on {RNA-Seq} Data", journal = j-TCBB, volume = "13", number = "2", pages = "392--398", month = mar, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2440265", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 12:53:43 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the development of deep sequencing technologies, many RNA-Seq data have been generated. Researchers have proposed many methods based on the sparse theory to identify the differentially expressed genes from these data. In order to improve the performance of sparse principal component analysis, in this paper, we propose a novel class-information-based sparse component analysis (CISCA) method which introduces the class information via a total scatter matrix. First, CISCA normalizes the RNA-Seq data by using a Poisson model to obtain their differential sections. Second, the total scatter matrix is gotten by combining the between-class and within-class scatter matrices. Third, we decompose the total scatter matrix by using singular value decomposition and construct a new data matrix by using singular values and left singular vectors. Then, aiming at obtaining sparse components, CISCA decomposes the constructed data matrix by solving an optimization problem with sparse constraints on loading vectors. Finally, the differentially expressed genes are identified by using the sparse loading vectors. The results on simulation and real RNA-Seq data demonstrate that our method is effective and suitable for analyzing these data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mozaffari-Kermani:2016:ISS, author = "Mehran Mozaffari-Kermani and Reza Azarderakhsh and Kui Ren and Jean-Luc Beuchat", title = "Introduction to the special section on emerging security trends for biomedical computations, devices, and infrastructures: guest editorial", journal = j-TCBB, volume = "13", number = "3", pages = "399--400", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "UNLIKE the traditional usage models for embedded systems security, nowadays, emerging computing systems are embedded in every aspect of human lives. One of the emerging usage models in which security is vital is deeply-embedded computing systems in human bodies, e.g., implantable and wearable medical devices. In addition to the security threats to traditional embedded systems, emerging deeply-embedded computing systems exhibit a larger attack surface, prone to more serious or life-threatening attacks. Biomedical deeply-embedded systems (deployed in human body, with computer programs sending and receiving medical data and performing data mining for the decisions) are currently getting developed with rapid rate and tremendous success. Moreover, the security/privacy issues in every aspect of bioinformatics (algorithmic, statistical, and the like) including secure and private big data analytics, acquisition, and storage, privacy-preserving data mining for biomedicine, secure machine-learning of bioinformatics, and security of hardware and software systems used for biological databases are emerging given their unique constraints. Many of the systems for such computations will need to be transparently integrated into sensitive environments --- the consequent size and energy constraints imposed on any security solutions are extreme. Thus, unique challenges arise due to the sensitivity of computation processing, need for security in implementations, and assurance ``gaps.''", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kocabas:2016:ESM, author = "Ovunc Kocabas and Tolga Soyata and Mehmet K. Aktas", title = "Emerging security mechanisms for medical cyber physical systems", journal = j-TCBB, volume = "13", number = "3", pages = "401--416", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The following decade will witness a surge in remote health-monitoring systems that are based on body-worn monitoring devices. These Medical Cyber Physical Systems (MCPS) will be capable of transmitting the acquired data to a private or public cloud for storage and processing. Machine learning algorithms running in the cloud and processing this data can provide decision support to healthcare professionals. There is no doubt that the security and privacy of the medical data is one of the most important concerns in designing an MCPS. In this paper, we depict the general architecture of an MCPS consisting of four layers: data acquisition, data aggregation, cloud processing, and action. Due to the differences in hardware and communication capabilities of each layer, different encryption schemes must be used to guarantee data privacy within that layer. We survey conventional and emerging encryption schemes based on their ability to provide secure storage, data sharing, and secure computation. Our detailed experimental evaluation of each scheme shows that while the emerging encryption schemes enable exciting new features such as secure sharing and secure computation, they introduce several orders-of-magnitude computational and storage overhead. We conclude our paper by outlining future research directions to improve the usability of the emerging encryption schemes in an MCPS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2016:AMU, author = "Cheng Chen and Fengchao Zhang and Jamie Barras and Kaspar Althoefer and Swarup Bhunia and Soumyajit Mandal", title = "Authentication of medicines using nuclear quadrupole resonance spectroscopy", journal = j-TCBB, volume = "13", number = "3", pages = "417--430", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The production and sale of counterfeit and substandard pharmaceutical products, such as essential medicines, is an important global public health problem. We describe a chemometric passport-based approach to improve the security of the pharmaceutical supply chain. Our method is based on applying nuclear quadrupole resonance (NQR) spectroscopy to authenticate the contents of medicine packets. NQR is a non-invasive, non-destructive, and quantitative radio frequency (RF) spectroscopic technique. It is sensitive to subtle features of the solid-state chemical environment and thus generates unique chemical fingerprints that are intrinsically difficult to replicate. We describe several advanced NQR techniques, including two-dimensional measurements, polarization enhancement, and spin density imaging, that further improve the security of our authentication approach. We also present experimental results that confirm the specificity and sensitivity of NQR and its ability to detect counterfeit medicines.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gong:2016:PDA, author = "Yanmin Gong and Yuguang Fang and Yuanxiong Guo", title = "Private data analytics on biomedical sensing data via distributed computation", journal = j-TCBB, volume = "13", number = "3", pages = "431--444", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Advances in biomedical sensors and mobile communication technologies have fostered the rapid growth of mobile health (mHealth) applications in the past years. Users generate a high volume of biomedical data during health monitoring, which can be used by the mHealth server for training predictive models for disease diagnosis and treatment. However, the biomedical sensing data raise serious privacy concerns because they reveal sensitive information such as health status and lifestyles of the sensed subjects. This paper proposes and experimentally studies a scheme that keeps the training samples private while enabling accurate construction of predictive models. We specifically consider logistic regression models which are widely used for predicting dichotomous outcomes in healthcare, and decompose the logistic regression problem into small subproblems over two types of distributed sensing data, i.e., horizontally partitioned data and vertically partitioned data. The subproblems are solved using individual private data, and thus mHealth users can keep their private data locally and only upload (encrypted) intermediate results to the mHealth server for model training. Experimental results based on real datasets show that our scheme is highly efficient and scalable to a large number of mHealth users.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ali:2016:SAC, author = "Sk Subidh Ali and Mohamed Ibrahim and Ozgur Sinanoglu and Krishnendu Chakrabarty and Ramesh Karri", title = "Security assessment of cyberphysical digital microfluidic biochips", journal = j-TCBB, volume = "13", number = "3", pages = "445--458", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A digital microfluidic biochip (DMFB) is an emerging technology that enables miniaturized analysis systems for point-of-care clinical diagnostics, DNA sequencing, and environmental monitoring. A DMFB reduces the rate of sample and reagent consumption, and automates the analysis of assays. In this paper, we provide the first assessment of the security vulnerabilities of DMFBs. We identify result-manipulation attacks on a DMFB that maliciously alter the assay outcomes. Two practical result-manipulation attacks are shown on a DMFB platform performing enzymatic glucose assay on serum. In the first attack, the attacker adjusts the concentration of the glucose sample and thereby modifies the final result. In the second attack, the attacker tampers with the calibration curve of the assay operation. We then identify denial-of-service attacks, where the attacker can disrupt the assay operation by tampering either with the droplet-routing algorithm or with the actuation sequence. We demonstrate these attacks using a digital microfluidic synthesis simulator. The results show that the attacks are easy to implement and hard to detect. Therefore, this work highlights the need for effective protections against malicious modifications in DMFBs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Knox:2016:MFG, author = "David A. Knox and Robin D. Dowell", title = "A modeling framework for generation of positional and temporal simulations of transcriptional regulation", journal = j-TCBB, volume = "13", number = "3", pages = "459--471", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a modeling framework aimed at capturing both the positional and temporal behavior of transcriptional regulatory proteins in eukaryotic cells. There is growing evidence that transcriptional regulation is the complex behavior that emerges not solely from the individual components, but rather from their collective behavior, including competition and cooperation. Our framework describes individual regulatory components using generic action oriented descriptions of their biochemical interactions with a DNA sequence. All the possible actions are based on the current state of factors bound to the DNA. We developed a rule builder to automatically generate the complete set of biochemical interaction rules for any given DNA sequence. Off-the-shelf stochastic simulation engines can model the behavior of a system of rules and the resulting changes in the configuration of bound factors can be visualized. We compared our model to experimental data at well-studied loci in yeast, confirming that our model captures both the positional and temporal behavior of transcriptional regulation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gu:2016:MSO, author = "Xu Gu", title = "A multi-state optimization framework for parameter estimation in biological systems", journal = j-TCBB, volume = "13", number = "3", pages = "472--482", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Parameter estimation is a key concern for reliable and predictive models of biological systems. In this paper, we propose a multi-objective, multi-state optimization framework that allows multiple data sources to be incorporated into the parameter estimation process. This enables the model to better represent a diverse range of data from both within and without the training set; and to determine more biologically relevant parameter values for the model parameters. The framework is based on a multi-objective PSwarm implementation (MoPSwarm) and is validated via a case study on the ERK signalling pathway, in which significant advantages over the conventional single-state approach are demonstrated. Several variants of the framework are analyzed to determine the optimal configuration for convergence and solution quality.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xie:2016:AHA, author = "Jiang Xie and Chaojuan Xiang and Jin Ma and Jun Tan and Tieqiao Wen and Jinzhi Lei and Qing Nie", title = "An adaptive hybrid algorithm for global network alignment", journal = j-TCBB, volume = "13", number = "3", pages = "483--493", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It is challenging to obtain reliable and optimal mapping between networks for alignment algorithms when both nodal and topological structures are taken into consideration due to the underlying NP-hard problem. Here, we introduce an adaptive hybrid algorithm that combines the classical Hungarian algorithm and the Greedy algorithm (HGA) for the global alignment of biomolecular networks. With this hybrid algorithm, every pair of nodes with one in each network is first aligned based on node information (e.g., their sequence attributes) and then followed by an adaptive and convergent iteration procedure for aligning the topological connections in the networks. For four well-studied protein interaction networks, i.e., C.elegans, yeast, D.melanogaster, and human, applications of HGA lead to improved alignments in acceptable running time. The mapping between yeast and human PINs obtained by the new algorithm has the largest value of common Gene Ontology (GO) terms compared to those obtained by other existing algorithms, while it still has lower Mean normalized entropy (MNE) and good performances on several other measures. Overall, the adaptive HGA is effective and capable of providing good mappings between aligned networks in which the biological properties of both the nodes and the connections are important.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", xxpages = "3:1--3:??", } @Article{Al-Dalky:2016:AMC, author = "Rami Al-Dalky and Kamal Taha and Dirar {Al Homouz} and Murad Qasaimeh", title = "Applying {Monte Carlo} simulation to biomedical literature to approximate genetic network", journal = j-TCBB, volume = "13", number = "3", pages = "494--504", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biologists often need to know the set of genes associated with a given set of genes or a given disease. We propose in this paper a classifier system called Monte Carlo for Genetic Network (MCforGN) that can construct genetic networks, identify functionally related genes, and predict gene-disease associations. MCforGN identifies functionally related genes based on their co-occurrences in the abstracts of biomedical literature. For a given gene g, the system first extracts the set of genes found within the abstracts of biomedical literature associated with g. It then ranks these genes to determine the ones with high co-occurrences with g. It overcomes the limitations of current approaches that employ analytical deterministic algorithms by applying Monte Carlo Simulation to approximate genetic networks. It does so by conducting repeated random sampling to obtain numerical results and to optimize these results. Moreover, it analyzes results to obtain the probabilities of different genes' co-occurrences using series of statistical tests. MCforGN can detect gene-disease associations by employing a combination of centrality measures (to identify the central genes in disease-specific genetic networks) and Monte Carlo Simulation. MCforGN aims at enhancing state-of-the-art biological text mining by applying novel extraction techniques. We evaluated MCforGN by comparing it experimentally with nine approaches. Results showed marked improvement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pavelka:2016:CAA, author = "Antonin Pavelka and Eva Sebestova and Barbora Kozlikova and Jan Brezovsky and Jiri Sochor and Jiri Damborsky", title = "{CAVER}: algorithms for analyzing dynamics of tunnels in macromolecules", journal = j-TCBB, volume = "13", number = "3", pages = "505--517", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The biological function of a macromolecule often requires that a small molecule or ion is transported through its structure. The transport pathway often leads through void spaces in the structure. The properties of transport pathways change significantly in time; therefore, the analysis of a trajectory from molecular dynamics rather than of a single static structure is needed for understanding the function of pathways. The identification and analysis of transport pathways are challenging because of the high complexity and diversity of macromolecular shapes, the thermal motion of their atoms, and the large amount of conformations needed to properly describe conformational space of protein structure. In this paper, we describe the principles of the CAVER 3.0 algorithms for the identification and analysis of properties of transport pathways both in static and dynamic structures. Moreover, we introduce the improved clustering solution for finding tunnels in macromolecules, which is included in the latest CAVER 3.02 version. Voronoi diagrams are used to identify potential pathways in each snapshot of a molecular dynamics trajectory and clustering is then used to find the correspondence between tunnels from different snapshots. Furthermore, the geometrical properties of pathways and their evolution in time are computed and visualized.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Puljiz:2016:DGV, author = "Zrinka Puljiz and Haris Vikalo", title = "Decoding genetic variations: communications-inspired haplotype assembly", journal = j-TCBB, volume = "13", number = "3", pages = "518--530", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput DNA sequencing technologies allow fast and affordable sequencing of individual genomes and thus enable unprecedented studies of genetic variations. Information about variations in the genome of an individual is provided by haplotypes, ordered collections of single nucleotide polymorphisms. Knowledge of haplotypes is instrumental in finding genes associated with diseases, drug development, and evolutionary studies. Haplotype assembly from high-throughput sequencing data is challenging due to errors and limited lengths of sequencing reads. The key observation made in this paper is that the minimum error-correction formulation of the haplotype assembly problem is identical to the task of deciphering a coded message received over a noisy channel---a classical problem in the mature field of communication theory. Exploiting this connection, we develop novel haplotype assembly schemes that rely on the bit-flipping and belief propagation algorithms often used in communication systems. The latter algorithm is then adapted to the haplotype assembly of polyploids. We demonstrate on both simulated and experimental data that the proposed algorithms compare favorably with state-of-the-art haplotype assembly methods in terms of accuracy, while being scalable and computationally efficient.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2016:EDP, author = "Cong Li and Can Yang and Greg Hather and Ray Liu and Hongyu Zhao", title = "Efficient drug-pathway association analysis via integrative penalized matrix decomposition", journal = j-TCBB, volume = "13", number = "3", pages = "531--540", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Traditional drug discovery practice usually follows the ``one drug --- one target'' approach, seeking to identify drug molecules that act on individual targets, which ignores the systemic nature of human diseases. Pathway-based drug discovery recently emerged as an appealing approach to overcome this limitation. An important first step of such pathway-based drug discovery is to identify associations between drug molecules and biological pathways. This task has been made feasible by the accumulating data from high-throughput transcription and drug sensitivity profiling. In this paper, we developed ``iPaD'', an integrative Penalized Matrix Decomposition method to identify drug-pathway associations through jointly modeling of such high-throughput transcription and drug sensitivity data. A scalable biconvex optimization algorithm was implemented and gave iPaD tremendous advantage in computational efficiency over current state-of-the-art method, which allows it to handle the evergrowing large-scale data sets that current method cannot afford to. On two widely used real data sets, iPaD also significantly outperformed the current method in terms of the number of validated drug-pathway associations that were identified. The Matlab code of our algorithm publicly available at http://licong-jason.github.io/iPaD/", acknowledgement = ack-nhfb, articleno = "2", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", xxpages = "2:1--2:??", } @Article{Macwan:2016:RSC, author = "Isaac G. Macwan and Zihe Zhao and Omar T. Sobh and Ishita Mukerji and Bhushan Dharmadhikari and Prabir K. Patra", title = "Residue specific and chirality dependent interactions between carbon nanotubes and flagellin", journal = j-TCBB, volume = "13", number = "3", pages = "541--548", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Flagellum is a lash-like cellular appendage found in many single-celled living organisms. The flagellin protofilaments contain 11-helix dual turn structure in a single flagellum. Each flagellin consists of four sub-domains --- two inner domains (D0, D1) and two outer domains (D2, D3). While inner domains predominantly consist of $ \alpha $-helices, the outer domains are primarily beta sheets with D3. In flagellum, the outermost sub-domain is the only one that is exposed to the native environment. This study focuses on the interactions of the residues of D3 of an R-type flagellin with 5nm long chiral (5, 15) and arm-chair (12, 12) single-walled carbon nanotubes (SWNT) using molecular dynamics simulation. It presents the interactive forces between the SWNT and the residues of D3 from the perspectives of size and chirality of the SWNT. It is found that the metallic (arm-chair) SWNT interacts the most with glycine and threonine residues through van der Waals and hydrophobic interactions, whereas the semiconducting (chiral) SWNT interacts largely with the area of protein devoid of glycine by van der Waals, hydrophobic interactions, and hydrogen bonding. This indicates a crucial role that glycine plays in distinguishing metallic from semiconducting SWNTs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liang:2016:NMD, author = "Cheng Liang and Yue Li and Jiawei Luo", title = "A novel method to detect functional {microRNA} regulatory modules by bicliques merging", journal = j-TCBB, volume = "13", number = "3", pages = "549--556", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs (miRNAs) are post-transcriptional regulators that repress the expression of their targets. They are known to work cooperatively with genes and play important roles in numerous cellular processes. Identification of miRNA regulatory modules (MRMs) would aid deciphering the combinatorial effects derived from the many-to-many regulatory relationships in complex cellular systems. Here, we develop an effective method called BiCliques Merging (BCM) to predict MRMs based on bicliques merging. By integrating the miRNA/mRNA expression profiles from The Cancer Genome Atlas (TCGA) with the computational target predictions, we construct a weighted miRNA regulatory network for module discovery. The maximal bicliques detected in the network are statistically evaluated and filtered accordingly. We then employed a greedy-based strategy to iteratively merge the remaining bicliques according to their overlaps together with edge weights and the gene-gene interactions. Comparing with existing methods on two cancer datasets from TCGA, we showed that the modules identified by our method are more densely connected and functionally enriched. Moreover, our predicted modules are more enriched for miRNA families and the miRNA-mRNA pairs within the modules are more negatively correlated. Finally, several potential prognostic modules are revealed by Kaplan--Meier survival analysis and breast cancer subtype analysis. Availability: BCM is implemented in Java and available for download in the supplementary materials, which can be found on the Computer Society Digital Library at http://doi.ieeecomputersociety.org/10.1109/TCBB.2015.2462370.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Teng:2016:EGP, author = "Ben Teng and Can Yang and Jiming Liu and Zhipeng Cai and Xiang Wan", title = "Exploring the genetic patterns of complex diseases via the integrative genome-wide approach", journal = j-TCBB, volume = "13", number = "3", pages = "557--564", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome-wide association studies (GWASs), which assay more than a million single nucleotide polymorphisms (SNPs) in thousands of individuals, have been widely used to identify genetic risk variants for complex diseases. However, most of the variants that have been identified contribute relatively small increments of risk and only explain a small portion of the genetic variation in complex diseases. This is the so-called missing heritability problem. Evidence has indicated that many complex diseases are genetically related, meaning these diseases share common genetic risk variants. Therefore, exploring the genetic correlations across multiple related studies could be a promising strategy for removing spurious associations and identifying underlying genetic risk variants, and thereby uncovering the mystery of missing heritability in complex diseases. We present a general and robust method to identify genetic patterns from multiple large-scale genomic datasets. We treat the summary statistics as a matrix and demonstrate that genetic patterns will form a low-rank matrix plus a sparse component. Hence, we formulate the problem as a matrix recovering problem, where we aim to discover risk variants shared by multiple diseases/traits and those for each individual disease/trait. We propose a convex formulation for matrix recovery and an efficient algorithm to solve the problem. We demonstrate the advantages of our method using both synthesized datasets and real datasets. The experimental results show that our method can successfully reconstruct both the shared and the individual genetic patterns from summary statistics and achieve comparable performances compared with alternative methods under a wide range of scenarios. The MATLAB code is available at:http://www.comp.hkbu.edu.hk/~xwan/iga.zip.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mirzaei:2016:FCN, author = "Sajad Mirzaei and Yufeng Wu", title = "Fast construction of near parsimonious hybridization networks for multiple phylogenetic trees", journal = j-TCBB, volume = "13", number = "3", pages = "565--570", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Hybridization networks represent plausible evolutionary histories of species that are affected by reticulate evolutionary processes. An established computational problem on hybridization networks is constructing the most parsimonious hybridization network such that each of the given phylogenetic trees (called gene trees) is ``displayed'' in the network. There have been several previous approaches, including an exact method and several heuristics, for this NP-hard problem. However, the exact method is only applicable to a limited range of data, and heuristic methods can be less accurate and also slow sometimes. In this paper, we develop a new algorithm for constructing near parsimonious networks for multiple binary gene trees. This method is more efficient for large numbers of gene trees than previous heuristics. This new method also produces more parsimonious results on many simulated datasets as well as a real biological dataset than a previous method. We also show that our method produces topologically more accurate networks for many datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ren:2016:FRA, author = "Hai-Peng Ren and Xiao-Na Huang and Jia-Xuan Hao", title = "Finding robust adaptation gene regulatory networks using multi-objective genetic algorithm", journal = j-TCBB, volume = "13", number = "3", pages = "571--577", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Robust adaptation plays a key role in gene regulatory networks, and it is thought to be an important attribute for the organic or cells to survive in fluctuating conditions. In this paper, a simplified three-node enzyme network is modeled by the Michaelis--Menten rate equations for all possible topologies, and a family of topologies and the corresponding parameter sets of the network with satisfactory adaptation are obtained using the multi-objective genetic algorithm. The proposed approach improves the computation efficiency significantly as compared to the time consuming exhaustive searching method. This approach provides a systemic way for searching the feasible topologies and the corresponding parameter sets to make the gene regulatory networks have robust adaptation. The proposed methodology, owing to its universality and simplicity, can be used to address more complex issues in biological networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2016:MMI, author = "Dan He and Irina Rish and David Haws and Laxmi Parida", title = "{MINT}: mutual information based transductive feature selection for genetic trait prediction", journal = j-TCBB, volume = "13", number = "3", pages = "578--583", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Whole genome prediction of complex phenotypic traits using high-density genotyping arrays has attracted a lot of attention, as it is relevant to the fields of plant and animal breeding and genetic epidemiology. Since the number of genotypes is generally much bigger than the number of samples, predictive models suffer from the curse of dimensionality. The curse of dimensionality problem not only affects the computational efficiency of a particular genomic selection method, but can also lead to a poor performance, mainly due to possible overfitting, or un-informative features. In this work, we propose a novel transductive feature selection method, called MINT, which is based on the MRMR (Max-Relevance and Min-Redundancy) criterion. We apply MINT on genetic trait prediction problems and show that, in general, MINT is a better feature selection method than the state-of-the-art inductive method MRMR.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Stamoulis:2016:OSD, author = "Catherine Stamoulis and Rebecca A. Betensky", title = "Optimization of signal decomposition matched filtering {(SDMF)} for improved detection of copy-number variations", journal = j-TCBB, volume = "13", number = "3", pages = "584--591", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We aim to improve the performance of the previously proposed signal decomposition matched filtering (SDMF) method [26] for the detection of copy-number variations (CNV) in the human genome. Through simulations we show that the modified SDMF is robust even at high noise levels and outperforms the original SDMF method, which indirectly depends on CNV frequency. Simulations are also used to develop a systematic approach for selecting relevant parameter thresholds in order to optimize sensitivity, specificity and computational efficiency. We apply the modified method to array CGH data from normal samples in The Cancer Genome Atlas (TCGA) and compare detected CNVs to those estimated using Circular Binary Segmentation (CBS) [19], a Hidden Markov Model (HMM)-based approach [11] and a subset of CNVs in the Database of Genomic Variants. We show that a substantial number of previously identified CNVs are detected by the optimized SDMF, which also outperforms all other methods.", abstract = "We aim to improve the performance of the previously proposed signal decomposition matched filtering (SDMF) method [26] for the detection of copy-number variations (CNV) in the human genome. Through simulations, we show that the modified SDMF is robust even at high noise levels and outperforms the original SDMF method, which indirectly depends on CNV frequency. Simulations are also used to develop a systematic approach for selecting relevant parameter thresholds in order to optimize sensitivity, specificity and computational efficiency. We apply the modified method to array CGH data from normal samples in the cancer genome atlas (TCGA) and compare detected CNVs to those estimated using circular binary segmentation (CBS) [19], a hidden Markov model (HMM)-based approach [11] and a subset of CNVs in the Database of Genomic Variants. We show that a substantial number of previously identified CNVs are detected by the optimized SDMF, which also outperforms the other two methods.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", xxpages = "1:1--1:??", } @Article{Liu:2016:PSE, author = "Yongchao Liu and Thomas Hankeln and Bertil Schmidt", title = "Parallel and space-efficient construction of {Burrows--Wheeler} transform and suffix array for big genome data", journal = j-TCBB, volume = "13", number = "3", pages = "592--598", month = may, year = "2016", CODEN = "ITCBCY", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Aug 29 06:50:39 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Next-generation sequencing technologies have led to the sequencing of more and more genomes, propelling related research into the era of big data. In this paper, we present ParaBWT, a parallelized Burrows--Wheeler transform (BWT) and suffix array construction algorithm for big genome data. In ParaBWT, we have investigated a progressive construction approach to constructing the BWT of single genome sequences in linear space complexity, but with a small constant factor. This approach has been further parallelized using multi-threading based on a master-slave coprocessing model. After gaining the BWT, the suffix array is constructed in a memory-efficient manner. The performance of ParaBWT has been evaluated using two sequences generated from two human genome assemblies: the Ensembl Homo sapiens assembly and the human reference genome. Our performance comparison to FMD-index and Bwt-disk reveals that on 12 CPU cores, ParaBWT runs up to $ 2.2 \times $ faster than FMD-index and up to $ 99.0 \times $ faster than Bwt-disk. BWT construction algorithms for very long genomic sequences are time consuming and (due to their incremental nature) inherently difficult to parallelize. Thus, their parallelization is challenging and even relatively small speedups like the ones of our method over FMD-index are of high importance to research. ParaBWT is written in C++, and is freely available at http://parabwt.sourceforge.net.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2016:NEA, author = "Biing-Feng Wang", title = "A New Efficient Algorithm for the All Sorting Reversals Problem with No Bad Components", journal = j-TCBB, volume = "13", number = "4", pages = "599--609", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of finding all reversals that take a permutation one step closer to a target permutation is called the all sorting reversals problem the ASR problem. For this problem, Siepel had an On3-time algorithm. Most complications of his algorithm stem from some peculiar structures called bad components. Since bad components are very rare in both real and simulated data, it is practical to study the ASR problem with no bad components. For the ASR problem with no bad components, Swenson et{\"\i}$ 3 / 4 $ al. gave an On2-time algorithm. Very recently, Swenson found that their algorithm does not always work. In this paper, a new algorithm is presented for the ASR problem with no bad components. The time complexity is On2 in the worst case and is linear in the size of input and output in practice.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ji:2016:DFM, author = "Junzhong Ji and Jiawei Luo and Cuicui Yang and Aidong Zhang", title = "Detecting Functional Modules Based on a Multiple-Grain Model in Large-Scale Protein-Protein Interaction Networks", journal = j-TCBB, volume = "13", number = "4", pages = "610--622", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2480066", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting functional modules from a Protein-Protein Interaction PPI network is a fundamental and hot issue in proteomics research, where many computational approaches have played an important role in recent years. However, how to effectively and efficiently detect functional modules in large-scale PPI networks is still a challenging problem. We present a new framework, based on a multiple-grain model of PPI networks, to detect functional modules in PPI networks. First, we give a multiple-grain representation model of a PPI network, which has a smaller scale with super nodes. Next, we design the protein grain partitioning method, which employs a functional similarity or a structural similarity to merge some proteins layer by layer. Thirdly, a refining mechanism with border node tests is proposed to address the protein overlapping of different modules during the grain eliminating process. Finally, systematic experiments are conducted on five large-scale yeast and human networks. The results show that the framework not only significantly reduces the running time of functional module detection, but also effectively identifies overlapping modules while keeping some competitive performances, thus it is highly competent to detect functional modules in large-scale PPI networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tran:2016:AOP, author = "Ngoc Hieu Tran and Xin Chen", title = "{AMAS}: Optimizing the Partition and Filtration of Adaptive Seeds to Speed up Read Mapping", journal = j-TCBB, volume = "13", number = "4", pages = "623--633", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465900", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Read mapping is a key task in next-generation sequencing NGS data analysis. To achieve an optimal combination of accuracy, speed, and low memory footprint, popular mapping tools often focus on identifying one or a few best mapping locations for each read. However, for many downstream analyses such as prediction of genomic variants or protein binding motifs located in repeat regions, isoform expression quantification, metagenomics analysis, it is more desirable to have a comprehensive set of all possible mapping locations of NGS reads. In this paper, we introduce AMAS, a read mapping tool that exhaustively searches for possible mapping locations of NGS reads in a reference sequence within a given edit distance. AMAS features improvements of the mapping, partition, and filtration of adaptive seeds to speed up the read mapping. Performance results on simulated and real data sets show that AMAS run several times faster than other state-of-the-art read mappers while achieving similar sensitivity and accuracy. AMAS is implemented in C++ and is freely available at https://sourceforge.net/projects/ngsamas/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2016:UGB, author = "Zhenchao Jiang and Lishuang Li and Degen Huang", title = "An Unsupervised Graph Based Continuous Word Representation Method for Biomedical Text Mining", journal = j-TCBB, volume = "13", number = "4", pages = "634--642", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2478467", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In biomedical text mining tasks, distributed word representation has succeeded in capturing semantic regularities, but most of them are shallow-window based models, which are not sufficient for expressing the meaning of words. To represent words using deeper information, we make explicit the semantic regularity to emerge in word relations, including dependency relations and context relations, and propose a novel architecture for computing continuous vector representation by leveraging those relations. The performance of our model is measured on word analogy task and Protein-Protein Interaction Extraction PPIE task. Experimental results show that our method performs overall better than other word representation models on word analogy task and have many advantages on biomedical text mining.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Catanzaro:2016:CPD, author = "Daniele Catanzaro and Stanley E. Shackney and Alejandro A. Sch{\"a}ffer and Russell Schwartz", title = "Classifying the Progression of Ductal Carcinoma from Single-Cell Sampled Data via Integer Linear Programming: a Case Study", journal = j-TCBB, volume = "13", number = "4", pages = "643--655", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476808", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ductal Carcinoma In Situ DCIS is a precursor lesion of Invasive Ductal Carcinoma IDC of the breast. Investigating its temporal progression could provide fundamental new insights for the development of better diagnostic tools to predict which cases of DCIS will progress to IDC. We investigate the problem of reconstructing a plausible progression from single-cell sampled data of an individual with synchronous DCIS and IDC. Specifically, by using a number of assumptions derived from the observation of cellular atypia occurring in IDC, we design a possible predictive model using integer linear programming ILP. Computational experiments carried out on a preexisting data set of 13 patients with simultaneous DCIS and IDC show that the corresponding predicted progression models are classifiable into categories having specific evolutionary characteristics. The approach provides new insights into mechanisms of clonal progression in breast cancers and helps illustrate the power of the ILP approach for similar problems in reconstructing tumor evolution scenarios under complex sets of constraints.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xi:2016:DRC, author = "Jianing Xi and Ao Li", title = "Discovering Recurrent Copy Number Aberrations in Complex Patterns via Non-Negative Sparse Singular Value Decomposition", journal = j-TCBB, volume = "13", number = "4", pages = "656--668", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474404", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recurrent copy number aberrations RCNAs in multiple cancer samples are strongly associated with tumorigenesis, and RCNA discovery is helpful to cancer research and treatment. Despite the emergence of numerous RCNA discovering methods, most of them are unable to detect RCNAs in complex patterns that are influenced by complicating factors including aberration in partial samples, co-existing of gains and losses and normal-like tumor samples. Here, we propose a novel computational method, called non-negative sparse singular value decomposition NN-SSVD, to address the RCNA discovering problem in complex patterns. In NN-SSVD, the measurement of RCNA is based on the aberration frequency in a part of samples rather than all samples, which can circumvent the complexity of different RCNA patterns. We evaluate NN-SSVD on synthetic dataset by comparison on detection scores and Receiver Operating Characteristics curves, and the results show that NN-SSVD outperforms existing methods in RCNA discovery and demonstrate more robustness to RCNA complicating factors. Applying our approach on a breast cancer dataset, we successfully identify a number of genomic regions that are strongly correlated with previous studies, which harbor a bunch of known breast cancer associated genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2016:EBE, author = "Lishuang Li and Shanshan Liu and Meiyue Qin and Yiwen Wang and Degen Huang", title = "Extracting Biomedical Event with Dual Decomposition Integrating Word Embeddings", journal = j-TCBB, volume = "13", number = "4", pages = "669--677", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476876", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extracting biomedical event from literatures has attracted much attention recently. By now, most of the state-of-the-art systems have been based on pipelines which suffer from cascading errors, and the words encoded by one-hot are unable to represent the semantic information. Joint inference with dual decomposition and novel word embeddings are adopted to address the two problems, respectively, in this work. Word embeddings are learnt from large scale unlabeled texts and integrated as an unsupervised feature into other rich features based on dependency parse graphs to detect triggers and arguments. The proposed system consists of four components: trigger detector, argument detector, jointly inference with dual decomposition, and rule-based semantic post-processing, and outperforms the state-of-the-art systems. On the development set of BioNLP'09, the F-score is 59.77 percent on the primary task, which is 0.96 percent higher than the best system. On the test set of BioNLP'11, the F-score is 56.09 and 0.89 percent higher than the best published result that do not adopt additional techniques. On the test set of BioNLP'13, the F-score reaches 53.19 percent which is 2.22 percent higher than the best result.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guan:2016:EBH, author = "Benjamin X. Guan and Bir Bhanu and Prue Talbot and Nikki Jo-Hao Weng", title = "Extraction of Blebs in Human Embryonic Stem Cell Videos", journal = j-TCBB, volume = "13", number = "4", pages = "678--688", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2480091", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Blebbing is an important biological indicator in determining the health of human embryonic stem cells hESC. Especially, areas of a bleb sequence in a video are often used to distinguish two cell blebbing behaviors in hESC: dynamic and apoptotic blebbings. This paper analyzes various segmentation methods for bleb extraction in hESC videos and introduces a bio-inspired score function to improve the performance in bleb extraction. Full bleb formation consists of bleb expansion and retraction. Blebs change their size and image properties dynamically in both processes and between frames. Therefore, adaptive parameters are needed for each segmentation method. A score function derived from the change of bleb area and orientation between consecutive frames is proposed which provides adaptive parameters for bleb extraction in videos. In comparison to manual analysis, the proposed method provides an automated fast and accurate approach for bleb sequence extraction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Elmsallati:2016:GAP, author = "Ahed Elmsallati and Connor Clark and Jugal Kalita", title = "Global Alignment of Protein-Protein Interaction Networks: a Survey", journal = j-TCBB, volume = "13", number = "4", pages = "689--705", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474391", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we survey algorithms that perform global alignment of networks or graphs. Global network alignment aligns two or more given networks to find the best mapping from nodes in one network to nodes in other networks. Since graphs are a common method of data representation, graph alignment has become important with many significant applications. Protein-protein interactions can be modeled as networks and aligning these networks of protein interactions has many applications in biological research. In this survey, we review algorithms for global pairwise alignment highlighting various proposed approaches, and classify them based on their methodology. Evaluation metrics that are used to measure the quality of the resulting alignments are also surveyed. We discuss and present a comparison between selected aligners on the same datasets and evaluate using the same evaluation metrics. Finally, a quick overview of the most popular databases of protein interaction networks is presented focusing on datasets that have been used recently.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wan:2016:MMP, author = "Shibiao Wan and Man-Wai Mak and Sun-Yuan Kung", title = "{Mem-mEN}: Predicting Multi-Functional Types of Membrane Proteins by Interpretable Elastic Nets", journal = j-TCBB, volume = "13", number = "4", pages = "706--718", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474407", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Membrane proteins play important roles in various biological processes within organisms. Predicting the functional types of membrane proteins is indispensable to the characterization of membrane proteins. Recent studies have extended to predicting single- and multi-type membrane proteins. However, existing predictors perform poorly and more importantly, they are often lack of interpretability. To address these problems, this paper proposes an efficient predictor, namely Mem-mEN, which can produce sparse and interpretable solutions for predicting membrane proteins with single- and multi-label functional types. Given a query membrane protein, its associated gene ontology GO information is retrieved by searching a compact GO-term database with its homologous accession number, which is subsequently classified by a multi-label elastic net EN classifier. Experimental results show that Mem-mEN significantly outperforms existing state-of-the-art membrane-protein predictors. Moreover, by using Mem-mEN, 338 out of more than 7,900 GO terms are found to play more essential roles in determining the functional types. Based on these 338 essential GO terms, Mem-mEN can not only predict the functional type of a membrane protein, but also explain why it belongs to that type. For the reader's convenience, the Mem-mEN server is available online at http://bioinfo.eie.polyu.edu.hk/MemmENServer/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dalton:2016:ORB, author = "Lori A. Dalton", title = "Optimal {ROC}-Based Classification and Performance Analysis under {Bayesian} Uncertainty Models", journal = j-TCBB, volume = "13", number = "4", pages = "719--729", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465966", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Popular tools to evaluate classifier performance are the false positive rate FPR, true positive rate TPR, receiver operator characteristic ROC curve, and area under the curve AUC. Typically, these quantities are estimated from training data using simple resampling and counting methods, which have been shown to perform poorly when the sample size is small, as is typical in many applications. This work takes a model-based approach in classifier training and performance analysis, where we assume the true population densities are members of an uncertainty class of distributions. Given a prior over the uncertainty class and data, we form a posterior and derive optimal mean-squared-error MSE FPR and TPR estimators, as well as the sample-conditioned MSE performance of these estimators. The theory also naturally leads to optimal ROC and AUC estimators. Finally, we develop a Neyman--Pearson-based approach to optimal classifier design, which maximizes the estimated TPR for a given estimated FPR. These tools are optimal over the uncertainty class of distributions given the sample, and are available in closed form or can be easily approximated for many models. Applications are demonstrated on both synthetic and real genomic data. MATLAB code and simulations results are available in the online supplementary material.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Azuma:2016:PAC, author = "Shun-ichi Azuma and Katsuya Owaki and Nobuhiro Shinohara and Toshiharu Sugie", title = "Performance Analysis of Chemotaxis Controllers: Which has Better Chemotaxis Controller, \bioname{Escherichia coli} or \bioname{Paramecium caudatum}?", journal = j-TCBB, volume = "13", number = "4", pages = "730--741", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474397", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Chemotaxis is the biological phenomenon in which organisms move to a more favorable location in an environment with a chemical attractant or repellent. Since chemotaxis is a typical example of the environmental response of organisms, it is a fundamental topic in biology and related fields. We discuss the performance of the internal controllers that generate chemotaxis. We first propose performance indices to evaluate the controllers. Based on these indices, we evaluate the performance of two controller models of Escherichia coli and Paramecium caudatum. As a result, it is disclosed that the E. coli-type controller achieves chemotaxis quickly but roughly, whereas the P. caudatum-type controller achieves it slowly but precisely. This result will be a biological contribution from a control theoretic point of view.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Marhon:2016:PPC, author = "Sajid A. Marhon and Stefan C. Kremer", title = "Prediction of Protein Coding Regions Using a Wide-Range Wavelet Window Method", journal = j-TCBB, volume = "13", number = "4", pages = "742--753", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476789", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of protein coding regions is an important topic in the field of genomic sequence analysis. Several spectrum-based techniques for the prediction of protein coding regions have been proposed. However, the outstanding issue in most of the proposed techniques is that these techniques depend on an experimentally-selected, predefined value of the window length. In this paper, we propose a new Wide-Range Wavelet Window WRWW method for the prediction of protein coding regions. The analysis of the proposed wavelet window shows that its frequency response can adapt its width to accommodate the change in the window length so that it can allow or prevent frequencies other than the basic frequency in the analysis of DNA sequences. This feature makes the proposed window capable of analyzing DNA sequences with a wide range of the window lengths without degradation in the performance. The experimental analysis of applying the WRWW method and other spectrum-based methods to five benchmark datasets has shown that the proposed method outperforms other methods along a wide range of the window lengths. In addition, the experimental analysis has shown that the proposed method is dominant in the prediction of both short and long exons.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2016:PBN, author = "Yun-Bo Zhao and J. Krishnan", title = "Probabilistic {Boolean} Network Modelling and Analysis Framework for {mRNA} Translation", journal = j-TCBB, volume = "13", number = "4", pages = "754--766", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2478477", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "mRNA translation is a complex process involving the progression of ribosomes on the mRNA, resulting in the synthesis of proteins, and is subject to multiple layers of regulation. This process has been modelled using different formalisms, both stochastic and deterministic. Recently, we introduced a Probabilistic Boolean modelling framework for mRNA translation, which possesses the advantage of tools for numerically exact computation of steady state probability distribution, without requiring simulation. Here, we extend this model to incorporate both random sequential and parallel update rules, and demonstrate its effectiveness in various settings, including its flexibility in accommodating additional static and dynamic biological complexities and its role in parameter sensitivity analysis. In these applications, the results from the model analysis match those of TASEP model simulations. Importantly, the proposed modelling framework maintains the stochastic aspects of mRNA translation and provides a way to exactly calculate probability distributions, providing additional tools of analysis in this context. Finally, the proposed modelling methodology provides an alternative approach to the understanding of the mRNA translation process, by bridging the gap between existing approaches, providing new analysis tools, and contributing to a more robust platform for modelling and understanding translation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chang:2016:RGR, author = "Young Hwan Chang and Roel Dobbe and Palak Bhushan and Joe W. Gray and Claire J. Tomlin", title = "Reconstruction of Gene Regulatory Networks Based on Repairing Sparse Low-Rank Matrices", journal = j-TCBB, volume = "13", number = "4", pages = "767--777", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465952", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the growth of high-throughput proteomic data, in particular time series gene expression data from various perturbations, a general question that has arisen is how to organize inherently heterogeneous data into meaningful structures. Since biological systems such as breast cancer tumors respond differently to various treatments, little is known about exactly how these gene regulatory networks GRNs operate under different stimuli. Challenges due to the lack of knowledge not only occur in modeling the dynamics of a GRN but also cause bias or uncertainties in identifying parameters or inferring the GRN structure. This paper describes a new algorithm which enables us to estimate bias error due to the effect of perturbations and correctly identify the common graph structure among biased inferred graph structures. To do this, we retrieve common dynamics of the GRN subject to various perturbations. We refer to the task as ``repairing'' inspired by ``image repairing'' in computer vision. The method can automatically correctly repair the common graph structure across perturbed GRNs, even without precise information about the effect of the perturbations. We evaluate the method on synthetic data sets and demonstrate an application to the DREAM data sets and discuss its implications to experiment design.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tang:2016:RMC, author = "Yang Tang and Huijun Gao and Wei Du and Jianquan Lu and Athanasios V. Vasilakos and J{\"u}rgen Kurths", title = "Robust Multiobjective Controllability of Complex Neuronal Networks", journal = j-TCBB, volume = "13", number = "4", pages = "778--791", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2485226", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper addresses robust multiobjective identification of driver nodes in the neuronal network of a cat's brain, in which uncertainties in determination of driver nodes and control gains are considered. A framework for robust multiobjective controllability is proposed by introducing interval uncertainties and optimization algorithms. By appropriate definitions of robust multiobjective controllability, a robust nondominated sorting adaptive differential evolution NSJaDE is presented by means of the nondominated sorting mechanism and the adaptive differential evolution JaDE. The simulation experimental results illustrate the satisfactory performance of NSJaDE for robust multiobjective controllability, in comparison with six statistical methods and two multiobjective evolutionary algorithms MOEAs: nondominated sorting genetic algorithms II NSGA-II and nondominated sorting composite differential evolution. It is revealed that the existence of uncertainties in choosing driver nodes and designing control gains heavily affects the controllability of neuronal networks. We also unveil that driver nodes play a more drastic role than control gains in robust controllability. The developed NSJaDE and obtained results will shed light on the understanding of robustness in controlling realistic complex networks such as transportation networks, power grid networks, biological networks, etc.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2016:MMH, author = "Yifeng Li and Haifen Chen and Jie Zheng and Alioune Ngom", title = "The Max-Min High-Order Dynamic {Bayesian} Network for Learning Gene Regulatory Networks with Time-Delayed Regulations", journal = j-TCBB, volume = "13", number = "4", pages = "792--803", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474409", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurately reconstructing gene regulatory network GRN from gene expression data is a challenging task in systems biology. Although some progresses have been made, the performance of GRN reconstruction still has much room for improvement. Because many regulatory events are asynchronous, learning gene interactions with multiple time delays is an effective way to improve the accuracy of GRN reconstruction. Here, we propose a new approach, called Max-Min high-order dynamic Bayesian network MMHO-DBN by extending the Max-Min hill-climbing Bayesian network technique originally devised for learning a Bayesian network's structure from static data. Our MMHO-DBN can explicitly model the time lags between regulators and targets in an efficient manner. It first uses constraint-based ideas to limit the space of potential structures, and then applies search-and-score ideas to search for an optimal HO-DBN structure. The performance of MMHO-DBN to GRN reconstruction was evaluated using both synthetic and real gene expression time-series data. Results show that MMHO-DBN is more accurate than current time-delayed GRN learning methods, and has an intermediate computing performance. Furthermore, it is able to learn long time-delayed relationships between genes. We applied sensitivity analysis on our model to study the performance variation along different parameter settings. The result provides hints on the setting of parameters of MMHO-DBN.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jian:2016:MKF, author = "Ling Jian and Zhonghang Xia and Xinnan Niu and Xijun Liang and Parimal Samir and Andrew J. Link", title = "$ \ell_2 $ Multiple Kernel Fuzzy {SVM}-Based Data Fusion for Improving Peptide Identification", journal = j-TCBB, volume = "13", number = "4", pages = "804--809", year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2480084", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Oct 8 09:42:35 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "SEQUEST is a database-searching engine, which calculates the correlation score between observed spectrum and theoretical spectrum deduced from protein sequences stored in a flat text file, even though it is not a relational and object-oriental repository. Nevertheless, the SEQUEST score functions fail to discriminate between true and false PSMs accurately. Some approaches, such as PeptideProphet and Percolator, have been proposed to address the task of distinguishing true and false PSMs. However, most of these methods employ time-consuming learning algorithms to validate peptide assignments [1]. In this paper, we propose a fast algorithm for validating peptide identification by incorporating heterogeneous information from SEQUEST scores and peptide digested knowledge. To automate the peptide identification process and incorporate additional information, we employ $ \ell_2 $ multiple kernel learning MKL to implement the current peptide identification task. Results on experimental datasets indicate that compared with state-of-the-art methods, i.e., PeptideProphet and Percolator, our data fusing strategy has comparable performance but reduces the running time significantly.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2016:GES, author = "Chao Wang and Hong Yu and Aili Wang and Kai Xia", title = "Guest Editorial for Special Section on Big Data Computing and Processing in Computational Biology and Bioinformatics", journal = j-TCBB, volume = "13", number = "5", pages = "810--811", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2581460", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section focus on big data computing in the field of bioinformatics and biocomputing. Big data has emerged as an important application field which has shown its huge impact in different scientific research domains. In particular, the big data bioinformatics applications such as DNA sequence analysis have posed significant challenges to the state-of-the-art processing and computing systems. With the growing explosive data scale, the collection, storage, retrieval, processing, scheduling, and visualization are key big data issues to be tackled. Up to now, many researchers have been seeking high-level parallelism using novel big data computing architectures and processing mechanisms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Moore:2016:EMF, author = "Erin Jessica Moore and Thirmachos Bourlai", title = "Expectation Maximization of Frequent Patterns, a Specific, Local, Pattern-Based Biclustering Algorithm for Biological Datasets", journal = j-TCBB, volume = "13", number = "5", pages = "812--824", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2510011", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Currently, binary biclustering algorithms are too slow and non-specific to handle biological datasets that have a large number of attributes, which is essential for the computational biology problem of microarray analysis. Specialized computers may be needed to execute an algorithm, and may fail to produce a solution, due to its large resource needs. The biclusters also include too many false positives, the type I error, which hinders biological discovery. We propose an algorithm that can analyze datasets with a large attribute set at different densities, and can operate on a laptop, which makes it accessible to practitioners. EMFP produces biclusters that have a very low Root Mean Squared Error and false positive rate, with very few type II errors. Our binary biclustering algorithm is a hybrid, axis-parallel, pattern-based algorithm that finds multiple, non-overlapping, near-constant, deterministic, binary submatrices, with a variable confidence threshold, and the novel use of local density comparisons versus the standard global threshold. EMFP introduces a new, and intuitive way to calculate internal measures for binary biclustering methods. We also introduce a framework to ease comparison with other algorithms, and compare to both binary and general biclustering algorithms using two real, and 80 synthetic databases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2016:IGM, author = "Ya Zhang and Ao Li and Chen Peng and Minghui Wang", title = "Improve Glioblastoma Multiforme Prognosis Prediction by Using Feature Selection and Multiple Kernel Learning", journal = j-TCBB, volume = "13", number = "5", pages = "825--835", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2551745", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Glioblastoma multiforme GBM is a highly aggressive type of brain cancer with very low median survival. In order to predict the patient's prognosis, researchers have proposed rules to classify different glioma cancer cell subtypes. However, survival time of different subtypes of GBM is often various due to different individual basis. Recent development in gene testing has evolved classic subtype rules to more specific classification rules based on single biomolecular features. These classification methods are proven to perform better than traditional simple rules in GBM prognosis prediction. However, the real power behind the massive data is still under covered. We believe a combined prediction model based on more than one data type could perform better, which will contribute further to clinical treatment of GBM. The Cancer Genome Atlas TCGA database provides huge dataset with various data types of many cancers that enables us to inspect this aggressive cancer in a new way. In this research, we have improved GBM prognosis prediction accuracy further by taking advantage of the minimum redundancy feature selection method mRMR and Multiple Kernel Machine MKL learning method. Our goal is to establish an integrated model which could predict GBM prognosis with high accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2016:IDA, author = "Xiaoyi Xu and Minghui Wang", title = "Inferring Disease Associated Phosphorylation Sites via Random Walk on Multi-Layer Heterogeneous Network", journal = j-TCBB, volume = "13", number = "5", pages = "836--844", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2498548", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "As protein phosphorylation plays an important role in numerous cellular processes, many studies have been undertaken to analyze phosphorylation-related activities for drug design and disease treatment. However, although progresses have been made in illustrating the relationship between phosphorylation and diseases, no existing method focuses on disease-associated phosphorylation sites prediction. In this work, we proposed a multi-layer heterogeneous network model that makes use of the kinase information to infer disease-phosphorylation site relationship and implemented random walk on the heterogeneous network. Experimental results reveal that multi-layer heterogeneous network model with kinase layer is superior in inferring disease-phosphorylation site relationship when comparing with existing random walk model and common used classification methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ling:2016:MTH, author = "Cheng Ling and Tsuyoshi Hamada and Jingyang Gao and Guoguang Zhao and Donghong Sun and Weifeng Shi", title = "{MrBayes tgMC 3++}: a High Performance and Resource-Efficient {GPU}-Oriented Phylogenetic Analysis Method", journal = j-TCBB, volume = "13", number = "5", pages = "845--854", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495202", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MrBayes is a widespread phylogenetic inference tool harnessing empirical evolutionary models and Bayesian statistics. However, the computational cost on the likelihood estimation is very expensive, resulting in undesirably long execution time. Although a number of multi-threaded optimizations have been proposed to speed up MrBayes, there are bottlenecks that severely limit the GPU thread-level parallelism of likelihood estimations. This study proposes a high performance and resource-efficient method for GPU-oriented parallelization of likelihood estimations. Instead of having to rely on empirical programming, the proposed novel decomposition storage model implements high performance data transfers implicitly. In terms of performance improvement, a speedup factor of up to 178 can be achieved on the analysis of simulated datasets by four Tesla K40 cards. In comparison to the other publicly available GPU-oriented MrBayes, the tgMC$^3$ ++ method proposed herein outperforms the tgMC$^3$ v1.0, nMC$^3$ v2.1.1 and oMC$^3$ v1.00 methods by speedup factors of up to 1.6, 1.9 and 2.9, respectively. Moreover, tgMC$^3$ ++ supports more evolutionary models and gamma categories, which previous GPU-oriented methods fail to take into analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2016:MCS, author = "Jingsong Zhang and Yinglin Wang and Chao Zhang and Yongyong Shi", title = "Mining Contiguous Sequential Generators in Biological Sequences", journal = j-TCBB, volume = "13", number = "5", pages = "855--867", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495132", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The discovery of conserved sequential patterns in biological sequences is essential to unveiling common shared functions. Mining sequential generators as well as mining closed sequential patterns can contribute to a more concise result set than mining all sequential patterns, especially in the analysis of big data in bioinformatics. Previous studies have also presented convincing arguments that the generator is preferable to the closed pattern in inductive inference and classification. However, classic sequential generator mining algorithms, due to the lack of consideration on the contiguous constraint along with the lower-closed one, still pose a great challenge at spawning a large number of inefficient and redundant patterns, which is too huge for effective usage. Driven by some extensive applications of patterns with contiguous feature, we propose ConSgen, an efficient algorithm for discovering contiguous sequential generators. It adopts the n-gram model, called shingles, to generate potential frequent subsequences and leverages several pruning techniques to prune the unpromising parts of search space. And then, the contiguous sequential generators are identified by using the equivalence class-based lower-closure checking scheme. Our experiments on both DNA and protein data sets demonstrate the compactness, efficiency, and scalability of ConSgen.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hua:2016:GSM, author = "Keru Hua and Qin Yu and Ruiming Zhang", title = "A Guaranteed Similarity Metric Learning Framework for Biological Sequence Comparison", journal = j-TCBB, volume = "13", number = "5", pages = "868--877", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495186", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Similarity of sequences is a key mathematical notion for Classification and Phylogenetic studies in Biology. The distance and similarity between two sequence are very important and widely studied. During the last decades, Similaritydistance metric learning is one of the hottest topics of machine learning/data mining as well as their applications in the bioinformatics field. It is feasible to introduce machine learning technology to learn similarity metric from biological data. In this paper, we propose a novel framework of guaranteed similarity metric learning GMSL to perform alignment of biology sequences in any feature vector space. It introduces the $ \epsilon, \gamma, \tau $ -goodness similarity theory to Mahalanobis metric learning. As a theoretical guaranteed similarity metric learning approach, GMSL guarantees that the learned similarity function performs well in classification and clustering. Our experiments on the most used datasets demonstrate that our approach outperforms the state-of-the-art biological sequences alignment methods and other similarity metric learning algorithms in both accuracy and stability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Thorvaldsen:2016:MMF, author = "Steinar Thorvaldsen", title = "A Mutation Model from First Principles of the Genetic Code", journal = j-TCBB, volume = "13", number = "5", pages = "878--886", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2489641", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The paper presents a neutral Codons Probability Mutations CPM model of molecular evolution and genetic decay of an organism. The CPM model uses a Markov process with a 20-dimensional state space of probability distributions over amino acids. The transition matrix of the Markov process includes the mutation rate and those single point mutations compatible with the genetic code. This is an alternative to the standard Point Accepted Mutation PAM and BLOcks of amino acid SUbstitution Matrix BLOSUM. Genetic decay is quantified as a similarity between the amino acid distribution of proteins from a group of species on one hand, and the equilibrium distribution of the Markov chain on the other. Amino acid data for the eukaryote, bacterium, and archaea families are used to illustrate how both the CPM and PAM models predict their genetic decay towards the equilibrium value of 1. A family of bacteria is studied in more detail. It is found that warm environment organisms on average have a higher degree of genetic decay compared to those species that live in cold environments. The paper addresses a new codon-based approach to quantify genetic decay due to single point mutations compatible with the genetic code. The present work may be seen as a first approach to use codon-based Markov models to study how genetic entropy increases with time in an effectively neutral biological regime. Various extensions of the model are also discussed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hao:2016:NMU, author = "Xiao-Hu Hao and Gui-Jun Zhang and Xiao-Gen Zhou and Xu-Feng Yu", title = "A Novel Method Using Abstract Convex Underestimation in Ab-Initio Protein Structure Prediction for Guiding Search in Conformational Feature Space", journal = j-TCBB, volume = "13", number = "5", pages = "887--900", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2497226", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To address the searching problem of protein conformational space in ab-initio protein structure prediction, a novel method using abstract convex underestimation ACUE based on the framework of evolutionary algorithm was proposed. Computing such conformations, essential to associate structural and functional information with gene sequences, is challenging due to the high-dimensionality and rugged energy surface of the protein conformational space. As a consequence, the dimension of protein conformational space should be reduced to a proper level. In this paper, the high-dimensionality original conformational space was converted into feature space whose dimension is considerably reduced by feature extraction technique. And, the underestimate space could be constructed according to abstract convex theory. Thus, the entropy effect caused by searching in the high-dimensionality conformational space could be avoided through such conversion. The tight lower bound estimate information was obtained to guide the searching direction, and the invalid searching area in which the global optimal solution is not located could be eliminated in advance. Moreover, instead of expensively calculating the energy of conformations in the original conformational space, the estimate value is employed to judge if the conformation is worth exploring to reduce the evaluation time, thereby making computational cost lower and the searching process more efficient. Additionally, fragment assembly and the Monte Carlo method are combined to generate a series of metastable conformations by sampling in the conformational space. The proposed method provides a novel technique to solve the searching problem of protein conformational space. Twenty small-to-medium structurally diverse proteins were tested, and the proposed ACUE method was compared with It Fix, HEA, Rosetta and the developed method LEDE without underestimate information. Test results show that the ACUE method can more rapidly and more efficiently obtain the near-native protein structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2016:SBD, author = "Peng Chen and ShanShan Hu and Jun Zhang and Xin Gao and Jinyan Li and Junfeng Xia and Bing Wang", title = "A Sequence-Based Dynamic Ensemble Learning System for Protein Ligand-Binding Site Prediction", journal = j-TCBB, volume = "13", number = "5", pages = "901--912", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505286", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Background: Proteins have the fundamental ability to selectively bind to other molecules and perform specific functions through such interactions, such as protein-ligand binding. Accurate prediction of protein residues that physically bind to ligands is important for drug design and protein docking studies. Most of the successful protein-ligand binding predictions were based on known structures. However, structural information is not largely available in practice due to the huge gap between the number of known protein sequences and that of experimentally solved structures. Results: This paper proposes a dynamic ensemble approach to identify protein-ligand binding residues by using sequence information only. To avoid problems resulting from highly imbalanced samples between the ligand-binding sites and non ligand-binding sites, we constructed several balanced data sets and we trained a random forest classifier for each of them. We dynamically selected a subset of classifiers according to the similarity between the target protein and the proteins in the training data set. The combination of the predictions of the classifier subset to each query protein target yielded the final predictions. The ensemble of these classifiers formed a sequence-based predictor to identify protein-ligand binding sites. Conclusions: Experimental results on two Critical Assessment of protein Structure Prediction datasets and the ccPDB dataset demonstrated that of our proposed method compared favorably with the state-of-the-art. Availability: http://www2.ahu.edu.cn/pchen/web/LigandDSES.htm", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Disanto:2016:APN, author = "Filippo Disanto and Noah A. Rosenberg", title = "Asymptotic Properties of the Number of Matching Coalescent Histories for Caterpillar-Like Families of Species Trees", journal = j-TCBB, volume = "13", number = "5", pages = "913--925", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2485217", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Coalescent histories provide lists of species tree branches on which gene tree coalescences can take place, and their enumerative properties assist in understanding the computational complexity of calculations central in the study of gene trees and species trees. Here, we solve an enumerative problem left open by Rosenberg IEEE/ACM Transactions on Computational Biology and Bioinformatics 10: 1253-1262, 2013 concerning the number of coalescent histories for gene trees and species trees with a matching labeled topology that belongs to a generic caterpillar-like family. By bringing a generating function approach to the study of coalescent histories, we prove that for any caterpillar-like family with seed tree $t$ , the sequence $ h_{n_{n \ge 0}}$ describing the number of matching coalescent histories of the $n$ th tree of the family grows asymptotically as a constant multiple of the Catalan numbers. Thus, $ h_n \sim \beta_t c_n$ , where the asymptotic constant $ \beta_t > 0$ depends on the shape of the seed tree $t$. The result extends a claim demonstrated only for seed trees with at most eight taxa to arbitrary seed trees, expanding the set of cases for which detailed enumerative properties of coalescent histories can be determined. We introduce a procedure that computes from $t$ the constant $ \beta_t$ as well as the algebraic expression for the generating function of the sequence $ h_n_{n \ge 0}$.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nogueira:2016:BBW, author = "David Nogueira and Pedro Tomas and Nuno Roma", title = "{BowMapCL}: {Burrows--Wheeler} Mapping on Multiple Heterogeneous Accelerators", journal = j-TCBB, volume = "13", number = "5", pages = "926--938", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495149", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The computational demand of exact-search procedures has pressed the exploitation of parallel processing accelerators to reduce the execution time of many applications. However, this often imposes strict restrictions in terms of the problem size and implementation efforts, mainly due to their possibly distinct architectures. To circumvent this limitation, a new exact-search alignment tool BowMapCL based on the Burrows--Wheeler Transform and FM-Index is presented. Contrasting to other alternatives, BowMapCL is based on a unified implementation using OpenCL, allowing the exploitation of multiple and possibly different devices e.g., NVIDIA, AMD/ATI, and Intel GPUs/APUs. Furthermore, to efficiently exploit such heterogeneous architectures, BowMapCL incorporates several techniques to promote its performance and scalability, including multiple buffering, work-queue task-distribution, and dynamic load-balancing, together with index partitioning, bit-encoding, and sampling. When compared with state-of-the-art tools, the attained results showed that BowMapCL using a single GPU is $ 2 \times $ to $ 7.5 \times $ faster than mainstream multi-threaded CPU BWT-based aligners, like Bowtie, BWA, and SOAP2; and up to $ 4 \times $ faster than the best performing state-of-the-art GPU implementations namely, SOAP3 and HPG-BWT. When multiple and completely distinct devices are considered, BowMapCL efficiently scales the offered throughput, ensuring a convenient load-balance of the involved processing in the several distinct devices.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shi:2016:OEM, author = "Yan Shi and Jinglong Niu and Zhixin Cao and Maolin Cai and Jian Zhu and Weiqing Xu", title = "Online Estimation Method for Respiratory Parameters Based on a Pneumatic Model", journal = j-TCBB, volume = "13", number = "5", pages = "939--946", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2497225", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mechanical ventilation is an important method to help people breathe. Respiratory parameters of ventilated patients are usually tracked for pulmonary diagnostics and respiratory treatment assessment. In this paper, to improve the estimation accuracy of respiratory parameters, a pneumatic model for mechanical ventilation was proposed. Furthermore, based on the mathematical model, a recursive least-squares algorithm was adopted to estimate the respiratory parameters. Finally, through experimental and numerical study, it was demonstrated that the proposed estimation method was effective and the method can be used in pulmonary diagnostics and treatment.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2016:PSS, author = "Liqi Li and Jinhui Li and Weidong Xiao and Yongsheng Li and Yufang Qin and Shiwen Zhou and Hua Yang", title = "Prediction the Substrate Specificities of Membrane Transport Proteins Based on Support Vector Machine and Hybrid Features", journal = j-TCBB, volume = "13", number = "5", pages = "947--953", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495140", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Membrane transport proteins and their substrate specificities play crucial roles in a variety of cellular functions. Identifying the substrate specificities of membrane transport proteins is closely related to the protein-target interaction prediction, drug design, membrane recruitment, and dysregulation analysis. However, experimental methods to this aim are time consuming, labor intensive, and costly. Therefore, we proposed a novel method basing on support vector machine SVM to predict substrate specificities of membrane transport proteins by integrating features from position-specific score matrix PSSM, PROFEAT, and Gene Ontology GO. Finally, jackknife cross-validation tests were adopted on a benchmark and independent datasets to measure the performance of the proposed method. The overall accuracy of 96.16 and 80.45 percent were obtained for two datasets, which are higher from 2.12 to 20.44 percent than that by the state-of-the-art tool. Comparison results indicate that the proposed model is more reliable and efficient for accurate prediction the substrate specificities of membrane transport proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Axenopoulos:2016:SSF, author = "Apostolos Axenopoulos and Dimitrios Rafailidis and Georgios Papadopoulos and Elias N. Houstis and Petros Daras", title = "Similarity Search of Flexible {$3$D} Molecules Combining Local and Global Shape Descriptors", journal = j-TCBB, volume = "13", number = "5", pages = "954--970", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2498553", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, a framework for shape-based similarity search of 3D molecular structures is presented. The proposed framework exploits simultaneously the discriminative capabilities of a global, a local, and a hybrid local-global shape feature to produce a geometric descriptor that achieves higher retrieval accuracy than each feature does separately. Global and hybrid features are extracted using pairwise computations of diffusion distances between the points of the molecular surface, while the local feature is based on accumulating pairwise relations among oriented surface points into local histograms. The local features are integrated into a global descriptor vector using the bag-of-features approach. Due to the intrinsic property of its constituting shape features to be invariant to articulations of the 3D objects, the framework is appropriate for similarity search of flexible 3D molecules, while at the same time it is also accurate in retrieving rigid 3D molecules. The proposed framework is evaluated in flexible and rigid shape matching of 3D protein structures as well as in shape-based virtual screening of large ligand databases with quite promising results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ang:2016:SUS, author = "Jun Chin Ang and Andri Mirzal and Habibollah Haron and Haza Nuzly Abdull Hamed", title = "Supervised, Unsupervised, and Semi-Supervised Feature Selection: a Review on Gene Selection", journal = j-TCBB, volume = "13", number = "5", pages = "971--989", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2478454", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recently, feature selection and dimensionality reduction have become fundamental tools for many data mining tasks, especially for processing high-dimensional data such as gene expression microarray data. Gene expression microarray data comprises up to hundreds of thousands of features with relatively small sample size. Because learning algorithms usually do not work well with this kind of data, a challenge to reduce the data dimensionality arises. A huge number of gene selection are applied to select a subset of relevant features for model construction and to seek for better cancer classification performance. This paper presents the basic taxonomy of feature selection, and also reviews the state-of-the-art gene selection methods by grouping the literatures into three categories: supervised, unsupervised, and semi-supervised. The comparison of experimental results on top 5 representative gene expression datasets indicates that the classification accuracy of unsupervised and semi-supervised feature selection is competitive with supervised feature selection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2016:NMP, author = "Yuanning Liu and Qi Zhao and Hao Zhang and Rui Xu and Yang Li and Liyan Wei", title = "A New Method to Predict {RNA} Secondary Structure Based on {RNA} Folding Simulation", journal = j-TCBB, volume = "13", number = "5", pages = "990--995", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2496347", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA plays an important role in various biological processes; hence, it is essential when determining the functions of RNA to research its secondary structures. So far, the accuracy of RNA secondary structure prediction remains an area in need of improvement. This paper presents a novel method for predicting RNA secondary structure based on an RNA folding simulation model. This model assumes that the process of RNA folding from the random coil state to full structure is staged and in every stage of folding, the final state of an RNA is determined by the optimal combination of helical regions, which are urgently essential to dynamics of RNA formation. This paper proposes the First Large Free Energy Difference FLED in order to find the helical regions most urgently needed for optimal final state formation among all the possible helical regions. Tests on the datasets with known structures from public databases demonstrate that our method can outperform other current RNA secondary structure prediction methods in terms of prediction accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kwon:2016:DRA, author = "Yung-Keun Kwon and Junil Kim and Kwang-Hyun Cho", title = "Dynamical Robustness against Multiple Mutations in Signaling Networks", journal = j-TCBB, volume = "13", number = "5", pages = "996--1002", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495251", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It has been known that the robust behavior of a cellular signaling network is strongly related to the structural characteristics of the network, such as connectivity, the number of feedback loops, and the number of feed-forward loops. Previous studies proved such relationships through dynamical simulations of various random network models. Most of them, however, focused on robustness against a single node mutation. Considering that complex diseases such as cancer are mostly caused by simultaneous dysfunction of multiple genes, it is needed to investigate the robustness of a network against multiple node mutations. In this paper, we investigated the robustness of a network against multiple node mutations through extensive simulations on the basis of Boolean network models. We found that the robustness against multiple mutations is, in most cases, weaker than the robustness against a single node mutation on average. Moreover, we found that the robustness against multiple mutations is strongly positively correlated with the robustness against single mutation. The difference between the multiple- and single-mutation robustness became larger as the number of mutated nodes increased or the number of nodes that are robust to single-mutation decreased. We further found that a node of relatively large connectivity or being involved with many feedback loops tends to be non-robust against multiple mutations. This finding is supported by the observation that poly-genic disease genes have high connectivity and are involved with a large number of feedback loops than mono-genic disease genes in a human signaling network. Together, our study shows that previous studies for a single node mutation can be extended to understand the network dynamics for multiple node mutations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2016:DMS, author = "Xing-Ming Zhao", title = "Data Mining in Systems Biology", journal = j-TCBB, volume = "13", number = "6", pages = "1003--1003", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2617698", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tang:2016:NAF, author = "Jian Tang and Shuigeng Zhou", title = "A New Approach for Feature Selection from Microarray Data Based on Mutual Information", journal = j-TCBB, volume = "13", number = "6", pages = "1004--1015", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2515582", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mutual information MI is a powerful concept for correlation-centric applications. It has been used for feature selection from microarray gene expression data in many works. One of the merits of MI is that, unlike many other heuristic methods, it is based on a mature theoretic foundation. When applied to microarray data, however, it faces some challenges. First, due to the large number of features i.e., genes present in microarray data, the true distributions for the expression values of some genes may be distorted by noise. Second, evaluating inter-group mutual information requires estimating multi-variate distributions, which is quite difficult if not impossible. To address these problems, in this paper, we propose a new MI-based feature selection approach for microarray data. Our approach relies on two strategies: one is relevance boosting, which requires a desirable feature to show substantially additional relevance with class labeling beyond the already selected features, the other is feature interaction enhancing, which probabilistically compensates for feature interaction missing from simple aggregation-based evaluation. We justify our approach from both theoretical perspective and experimental results. We use a synthetic dataset to show the statistical significance of the proposed strategies, and real-life datasets to show the improved performance of our approach over the existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yin:2016:EPT, author = "Xi Yin and Ying-Ying Xu and Hong-Bin Shen", title = "Enhancing the Prediction of Transmembrane $ \beta $-Barrel Segments with Chain Learning and Feature Sparse Representation", journal = j-TCBB, volume = "13", number = "6", pages = "1016--1026", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2528000", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Transmembrane $ \beta $-barrels TMBs are one important class of membrane proteins that play crucial functions in the cell. Membrane proteins are difficult wet-lab targets of structural biology, which call for accurate computational prediction approaches. Here, we developed a novel method named MemBrain-TMB to predict the spanning segments of transmembrane $ \beta $-barrel from amino acid sequence. MemBrain-TMB is a statistical machine learning-based model, which is constructed using a new chain learning algorithm with input features encoded by the image sparse representation approach. We considered the relative status information between neighboring residues for enhancing the performance, and the matrix of features was translated into feature image by sparse coding algorithm for noise and dimension reduction. To deal with the diverse loop length problem, we applied a dynamic threshold method, which is particularly useful for enhancing the recognition of short loops and tight turns. Our experiments demonstrate that the new protocol designed in MemBrain-TMB effectively helps improve prediction performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qin:2016:IDA, author = "Gui-Min Qin and Rui-Yi Li and Xing-Ming Zhao", title = "Identifying Disease Associated {miRNAs} Based on Protein Domains", journal = j-TCBB, volume = "13", number = "6", pages = "1027--1035", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2515608", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs are a class of small endogenous non-coding genes, acting as regulators in the post-transcriptional processes. Recently, the miRNAs are found to be widely involved in different types of diseases. Therefore, the identification of disease associated miRNAs can help understand the mechanisms that underlie the disease and identify new biomarkers. However, it is not easy to identify the miRNAs related to diseases due to its extensive involvements in various biological processes. In this work, we present a new approach to identify disease associated miRNAs based on domains, the functional and structural blocks of proteins. The results on real datasets demonstrate that our method can effectively identify disease related miRNAs with high precision.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2016:NBM, author = "Hao Wu and Lin Gao and Nikola K. Kasabov", title = "Network-Based Method for Inferring Cancer Progression at the Pathway Level from Cross-Sectional Mutation Data", journal = j-TCBB, volume = "13", number = "6", pages = "1036--1044", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520934", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Large-scale cancer genomics projects are providing a wealth of somatic mutation data from a large number of cancer patients. However, it is difficult to obtain several samples with a temporal order from one patient in evaluating the cancer progression. Therefore, one of the most challenging problems arising from the data is to infer the temporal order of mutations across many patients. To solve the problem efficiently, we present a Network-based method NetInf to Infer cancer progression at the pathway level from cross-sectional data across many patients, leveraging on the exclusive property of driver mutations within a pathway and the property of linear progression between pathways. To assess the robustness of NetInf, we apply it on simulated data with the addition of different levels of noise. To verify the performance of NetInf, we apply it to analyze somatic mutation data from three real cancer studies with large number of samples. Experimental results reveal that the pathways detected by NetInf show significant enrichment. Our method reduces computational complexity by constructing gene networks without assigning the number of pathways, which also provides new insights on the temporal order of somatic mutations at the pathway level rather than at the gene level.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fabris:2016:EEC, author = "Fabio Fabris and Alex A. Freitas and Jennifer M. A. Tullet", title = "An Extensive Empirical Comparison of Probabilistic Hierarchical Classifiers in Datasets of Ageing-Related Genes", journal = j-TCBB, volume = "13", number = "6", pages = "1045--1058", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505288", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This study comprehensively evaluates the performance of five types of probabilistic hierarchical classification methods used for predicting Gene Ontology GO terms related to ageing. Of those tested, a new hybrid of a Local Hierarchical Classifier LHC and the Predictive Clustering Tree algorithm LHC-PCT had the best predictive accuracy results. We also tested the impact of two types of variations in most hierarchical classification algorithms, namely: a changing the base algorithm we tested Naive Bayes and Support Vector Machines, and the impact of b using or not the Correlation based Feature Selection CFS algorithm in a pre-processing step. In total, we evaluated the predictive performance of 17 variations of hierarchical classifiers across 15 datasets of ageing and longevity-related genes. We conclude that the LHC-PCT algorithm ranks better across several tests seven out of 12. In addition, we interpreted the models generated by the PCT algorithm to show how hierarchical classification algorithms can be used to extract biological insights out of the ageing-related datasets that we compiled.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2016:CGS, author = "Dong Wang and Jin-Xing Liu and Ying-Lian Gao and Chun-Hou Zheng and Yong Xu", title = "Characteristic Gene Selection Based on Robust Graph Regularized Non-Negative Matrix Factorization", journal = j-TCBB, volume = "13", number = "6", pages = "1059--1067", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505294", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many methods have been considered for gene selection and analysis of gene expression data. Nonetheless, there still exists the considerable space for improving the explicitness and reliability of gene selection. To this end, this paper proposes a novel method named robust graph regularized non-negative matrix factorization for characteristic gene selection using gene expression data, which mainly contains two aspects: Firstly, enforcing $ {L_{21}} $ -norm minimization on error function which is robust to outliers and noises in data points. Secondly, it considers that the samples lie in low-dimensional manifold which embeds in a high-dimensional ambient space, and reveals the data geometric structure embedded in the original data. To demonstrate the validity of the proposed method, we apply it to gene expression data sets involving various human normal and tumor tissue samples and the results demonstrate that the method is effective and feasible.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Raposo:2016:CAM, author = "Adriano N. Raposo and Abel J. P. Gomes", title = "Computational {$3$D} Assembling Methods for {DNA}: a Survey", journal = j-TCBB, volume = "13", number = "6", pages = "1068--1085", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2510008", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "DNA encodes the genetic information of most living beings, except viruses that use RNA. Unlike other types of molecules, DNA is not usually described by its atomic structure being instead usually described by its base-pair sequence, i.e., the textual sequence of its subsidiary molecules known as nucleotides adenine A, cytosine C, guanine G, and thymine T. The three-dimensional assembling of DNA molecules based on its base-pair sequence has been, for decades, a topic of interest for many research groups all over the world. In this paper, we survey the major methods found in the literature to assemble and visualize DNA molecules from their base-pair sequences. We divided these methods into three categories: predictive methods, adaptive methods, and thermodynamic methods. Predictive methods aim to predict a conformation of the DNA from its base pair sequence, while the goal of adaptive methods is to assemble DNA base-pairs sequences along previously known conformations, as needed in scenarios such as DNA Monte Carlo simulations. Unlike these two geometric methods, thermodynamic methods are energy-based and aim to predict secondary structural motifs of DNA in cases where hydrogen bonds between base pairs might be broken because of temperature changes. We also present the major software tools that implements predictive, adaptive, and thermodynamic methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ray:2016:DCS, author = "Sumanta Ray and Sanghamitra Bandyopadhyay", title = "Discovering Condition Specific Topological Pattern Changes in Coexpression Network: an Application to {HIV-1} Progression", journal = j-TCBB, volume = "13", number = "6", pages = "1086--1099", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505300", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The natural progression of HIV-1 begins with a short acute retroviral syndrome which typically transit to chronic and clinical latency stages and subsequently progresses to a symptomatic, life-threatening immunodeficiency disease known as AIDS. Microarray analysis based on gene coexpression is widely used to investigate the coregulation pattern of a group or cluster of genes in a specific phenotype. Moreover, an investigation on the topological patterns across multiple phenotypes can facilitate the understanding of stage specific infection pattern of HIV-1 virus. Here, we develop a novel framework to identify topological patterns of gene co-expression network and detect changes of modular structure across different stages of HIV progression. This is achieved by comparing the topological and intramodular properties of HIV infection modules. To capture the diversity in modular structure, some topological, correlation based, and eigengene based measures are utilized here. We have applied a rank aggregation scheme to rank all the modules to provide a good agreement between these measures. Some novel transcription factors like `FOXO1', `GATA3', `GFI1', `IRF1', `IRF7', `MAX', `STAT1', `STAT3', `XBP1', and `YY1' that merge from the modules show significant change in expression pattern over HIV progression stages. Moreover, we have performed an eigengene based analysis to reveal the perturbation in modular structure across three stages of HIV-1 progression.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vilor-Tejedor:2016:EPM, author = "Natalia Vilor-Tejedor and Juan R. Gonzalez and M. Luz Calle", title = "Efficient and Powerful Method for Combining {$P$}-Values in Genome-Wide Association Studies", journal = j-TCBB, volume = "13", number = "6", pages = "1100--1106", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2509977", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The goal of Genome-wide Association Studies GWAS is the identification of genetic variants, usually single nucleotide polymorphisms SNPs, that are associated with disease risk. However, SNPs detected so far with GWAS for most common diseases only explain a small proportion of their total heritability. Gene set analysis GSA has been proposed as an alternative to single-SNP analysis with the aim of improving the power of genetic association studies. Nevertheless, most GSA methods rely on expensive computational procedures that make infeasible their implementation in GWAS. We propose a new GSA method, referred as globalEVT, which uses the extreme value theory to derive gene-level p-values. GlobalEVT reduces dramatically the computational requirements compared to other GSA approaches. In addition, this new approach improves the power by allowing different inheritance models for each genetic variant as illustrated in the simulation study performed and allows the existence of correlation between the SNPs. Real data analysis of an Attention-deficit/hyperactivity disorder ADHD study illustrates the importance of using GSA approaches for exploring new susceptibility genes. Specifically, the globalEVT method is able to detect genes related to Cyclophilin A like domain proteins which is known to play an important role in the mechanisms of ADHD development.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2016:EIS, author = "Xiaoqing Cheng and Tomoya Mori and Yushan Qiu and Wai-Ki Ching and Tatsuya Akutsu", title = "Exact Identification of the Structure of a Probabilistic {Boolean} Network from Samples", journal = j-TCBB, volume = "13", number = "6", pages = "1107--1116", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505310", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We study the number of samples required to uniquely determine the structure of a probabilistic Boolean network PBN, where PBNs are probabilistic extensions of Boolean networks. We show via theoretical analysis and computational analysis that the structure of a PBN can be exactly identified with high probability from a relatively small number of samples for interesting classes of PBNs of bounded indegree. On the other hand, we also show that there exist classes of PBNs for which it is impossible to uniquely determine the structure of a PBN from samples.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gong:2016:GBN, author = "Maoguo Gong and Zhenglin Peng and Lijia Ma and Jiaxiang Huang", title = "Global Biological Network Alignment by Using Efficient Memetic Algorithm", journal = j-TCBB, volume = "13", number = "6", pages = "1117--1129", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2511741", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput experimental screening techniques have resulted in a large number of biological network data such as protein-protein interactions PPI data. The analysis of these data can enhance our understanding of cellular processes. PPI network alignment is one of the comparative analysis methods for analyzing biological networks. Research on PPI networks can identify conserved subgraphs and help us to understand evolutionary trajectories across species. Some evolutionary algorithms have been proposed for coping with PPI network alignment, but most of them are limited by the lower search efficiency due to the lack of the priori knowledge. In this paper, we propose a memetic algorithm, denoted as MeAlgn, to solve the biological network alignment by optimizing an objective function which introduces topological structure and sequence similarities. MeAlign combines genetic algorithm with a local search refinement. The genetic algorithm is to find interesting alignment solution regions, and the local search is to find optimal solutions around the regions. The proposed algorithm first develops a coarse similarity score matrix for initialization and then it uses a specific neighborhood heuristic local search strategy to find an optimal alignment. In MeAlign, the information of topological structure and sequence similarities is used to guide our mapping. Experimental results demonstrate that our algorithm can achieve a better mapping than some state-of-the-art algorithms and it makes a better balance between the network topology and nodes sequence similarities.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ray:2016:IAC, author = "Meredith Ray and Jian Kang and Hongmei Zhang", title = "Identifying Activation Centers with Spatial {Cox} Point Processes Using {fMRI} Data", journal = j-TCBB, volume = "13", number = "6", pages = "1130--1141", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2510007", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We developed a Bayesian clustering method to identify significant regions of brain activation. Coordinate-based meta data originating from functional magnetic resonance imaging fMRI were of primary interest. Individual fMRI has the ability to measure the intensity of blood flow and oxygen to a location within the brain that was activated by a given thought or emotion. The proposed method performed clustering on two levels, latent foci center and study activation center, with a spatial Cox point process utilizing the Dirichlet process to describe the distribution of foci. Intensity was modeled as a function of distance between the focus and the center of the cluster of foci using a Gaussian kernel. Simulation studies were conducted to evaluate the sensitivity and robustness of the method with respect to cluster identification and underlying data distributions. We applied the method to a meta data set to identify emotion foci centers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2016:KBP, author = "Hong Wang and Xicheng Wang and Zheng Li and Keqiu Li", title = "Kriging-Based Parameter Estimation Algorithm for Metabolic Networks Combined with Single-Dimensional Optimization and Dynamic Coordinate Perturbation", journal = j-TCBB, volume = "13", number = "6", pages = "1142--1154", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505291", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The metabolic network model allows for an in-depth insight into the molecular mechanism of a particular organism. Because most parameters of the metabolic network cannot be directly measured, they must be estimated by using optimization algorithms. However, three characteristics of the metabolic network model, i.e., high nonlinearity, large amount parameters, and huge variation scopes of parameters, restrict the application of many traditional optimization algorithms. As a result, there is a growing demand to develop efficient optimization approaches to address this complex problem. In this paper, a Kriging-based algorithm aiming at parameter estimation is presented for constructing the metabolic networks. In the algorithm, a new infill sampling criterion, named expected improvement and mutual information EI\&MI, is adopted to improve the modeling accuracy by selecting multiple new sample points at each cycle, and the domain decomposition strategy based on the principal component analysis is introduced to save computing time. Meanwhile, the convergence speed is accelerated by combining a single-dimensional optimization method with the dynamic coordinate perturbation strategy when determining the new sample points. Finally, the algorithm is applied to the arachidonic acid metabolic network to estimate its parameters. The obtained results demonstrate the effectiveness of the proposed algorithm in getting precise parameter values under a limited number of iterations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wei:2016:MDD, author = "Guanyun Wei and Sheng Qin and Wenjuan Li and Liming Chen and Fei Ma", title = "{MDTE DB}: a Database for {MicroRNAs} Derived from Transposable Element", journal = j-TCBB, volume = "13", number = "6", pages = "1155--1160", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2511767", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs are crucial regulators of gene expression at post-transcriptional level. Understanding origin and evolution of miRNAs will improve the current available algorithm for the prediction of novel miRNAs and their functions. Transposable elements TEs provide a natural mechanism for the origin of new miRNAs. In the paper, 2,583 miRNAs derived from TEs MDTEs were collected to construct a database named MDTE database MDTE DB for storing, searching, and analyzing MDTEs. The database provides a convenient source for studying the origin and evolution of miRNAs. Database URL: http://bioinf.njnu.edu.cn/MDTE/MDTE.php.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2016:MDL, author = "Shuang Cheng and Maozu Guo and Chunyu Wang and Xiaoyan Liu and Yang Liu and Xuejian Wu", title = "{MiRTDL}: a Deep Learning Approach for {miRNA} Target Prediction", journal = j-TCBB, volume = "13", number = "6", pages = "1161--1169", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2510002", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs regulate genes that are associated with various diseases. To better understand miRNAs, the miRNA regulatory mechanism needs to be investigated and the real targets identified. Here, we present miRTDL, a new miRNA target prediction algorithm based on convolutional neural network CNN. The CNN automatically extracts essential information from the input data rather than completely relying on the input dataset generated artificially when the precise miRNA target mechanisms are poorly known. In this work, the constraint relaxing method is first used to construct a balanced training dataset to avoid inaccurate predictions caused by the existing unbalanced dataset. The miRTDL is then applied to 1,606 experimentally validated miRNA target pairs. Finally, the results show that our miRTDL outperforms the existing target prediction algorithms and achieves significantly higher sensitivity, specificity and accuracy of 88.43, 96.44, and 89.98 percent, respectively. We also investigate the miRNA target mechanism, and the results show that the complementation features are more important than the others.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qi:2016:PEP, author = "Yi Qi and Jiawei Luo", title = "Prediction of Essential Proteins Based on Local Interaction Density", journal = j-TCBB, volume = "13", number = "6", pages = "1170--1182", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2509989", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of essential proteins which is aided by computer science and supported from high throughput data is a more efficient method compared with time consuming and expensive experimental approaches. There are many computational approaches reported, however they are usually sensitive to various network structures so that their robustness are generally poor. In this paper, a novel topological centrality measure for predicting essential proteins based on local interaction density, named as LID, is proposed. It is different from previous measures that LID takes the essentiality of a node from interaction densities among its neighbors through topological analyses of real proteins in a protein complex set first time at the viewpoint of biological modules. LID is applied to four different yeast protein interaction networks, which are obtained, respectively, from the DIP database and the BioGRID database. The experimental results show that the number of essential proteins detected by LID universally exceeds or approximates the best performance of other 10 topological centrality measures in all 24 comparisons of four networks: DC, BC, ClusterC, CloseC, MNC, SoECCNC, LAC, SC, EigC, and InfoC. The better robustness of LID for multiple data sets will make it to be a new core topological centrality measure to improve the performance of prediction for more species protein interaction networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ancherbak:2016:TDG, author = "Sergiy Ancherbak and Ercan E. Kuruoglu and Martin Vingron", title = "Time-Dependent Gene Network Modelling by Sequential {Monte Carlo}", journal = j-TCBB, volume = "13", number = "6", pages = "1183--1193", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2496301", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Most existing methods used for gene regulatory network modeling are dedicated to inference of steady state networks, which are prevalent over all time instants. However, gene interactions evolve over time. Information about the gene interactions in different stages of the life cycle of a cell or an organism is of high importance for biology. In the statistical graphical models literature, one can find a number of methods for studying steady-state network structures while the study of time varying networks is rather recent. A sequential Monte Carlo method, namely particle filtering PF, provides a powerful tool for dynamic time series analysis. In this work, the PF technique is proposed for dynamic network inference and its potentials in time varying gene expression data tracking are demonstrated. The data used for validation are synthetic time series data available from the DREAM4 challenge, generated from known network topologies and obtained from transcriptional regulatory networks of S. cerevisiae. We model the gene interactions over the course of time with multivariate linear regressions where the parameters of the regressive process are changing over time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2016:DDS, author = "Yang Liu and Bowen Li and Jungang Lou", title = "Disturbance Decoupling of Singular {Boolean} Control Networks", journal = j-TCBB, volume = "13", number = "6", pages = "1194--1200", month = nov, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2509969", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 13 12:30:49 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper investigates the controller designing for disturbance decoupling problem DDP of singular Boolean control networks SBCNs. Using semi-tensor product STP of matrices and the Implicit Function Theorem, a SBCN is converted into the standard BCN. Based on the redundant variable separation technique, both state feedback and output feedback controllers are designed to solve the DDP of the SBCN. Sufficient conditions are also given to analyze the invariance of controllers concerning the DDP of the SBCN with function perturbation. Two illustrative examples are presented to support the effectiveness of these obtained results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tan:2017:ESS, author = "Ying Tan and Yuhui Shi", title = "Editorial: Special Section on Bio-Inspired Swarm Computing and Engineering", journal = j-TCBB, volume = "14", number = "1", pages = "1--3", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2566438", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Presents the introductory editorial for this issue of the publication.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Niu:2017:SBA, author = "Ben Niu and Huali Huang and Lijing Tan and Qiqi Duan", title = "Symbiosis-Based Alternative Learning Multi-Swarm Particle Swarm Optimization", journal = j-TCBB, volume = "14", number = "1", pages = "4--14", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459690", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Inspired by the ideas from the mutual cooperation of symbiosis in natural ecosystem, this paper proposes a new variant of PSO, named Symbiosis-based Alternative Learning Multi-swarm Particle Swarm Optimization SALMPSO. A learning probability to select one exemplar out of the center positions, the local best position, and the historical best position including the experience of internal and external multiple swarms, is used to keep the diversity of the population. Two different levels of social interaction within and between multiple swarms are proposed. In the search process, particles not only exchange social experience with others that are from their own sub-swarms, but also are influenced by the experience of particles from other fellow sub-swarms. According to the different exemplars and learning strategy, this model is instantiated as four variants of SALMPSO and a set of 15 test functions are conducted to compare with some variants of PSO including 10, 30 and 50 dimensions, respectively. Experimental results demonstrate that the alternative learning strategy in each SALMPSO version can exhibit better performance in terms of the convergence speed and optimal values on most multimodal functions in our simulation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mo:2017:NMB, author = "Hongwei Mo and Lili Liu and Jiao Zhao", title = "A New Magnetotactic Bacteria Optimization Algorithm Based on Moment Migration", journal = j-TCBB, volume = "14", number = "1", pages = "15--26", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2453949", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Magnetotactic bacteria is a kind of polyphyletic group of prokaryotes with the characteristics of magnetotaxis that make them orient and swim along geomagnetic field lines. Its distinct biology characteristics are useful to design new optimization technology. In this paper, a new bionic optimization algorithm named Magnetotactic Bacteria Moment Migration Algorithm MBMMA is proposed. In the proposed algorithm, the moments of a chain of magnetosomes are considered as solutions. The moments of relative good solutions can migrate each other to enhance the diversity of the MBMMA. It is compared with variants of PSO on standard functions problems. The experiment results show that the MBMMA is effective in solving optimization problems. It shows better or competitive performance compared with the variants of PSO on most of the tested functions in this paper.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2017:CFF, author = "Shaoqiu Zheng and Junzhi Li and Andreas Janecek and Ying Tan", title = "A Cooperative Framework for Fireworks Algorithm", journal = j-TCBB, volume = "14", number = "1", pages = "27--41", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2497227", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents a cooperative framework for fireworks algorithm CoFFWA. A detailed analysis of existing fireworks algorithm FWA and its recently developed variants has revealed that $i$ the current selection strategy has the drawback that the contribution of the firework with the best fitness denoted as core firework overwhelms the contributions of all other fireworks non-core fireworks in the explosion operator, $ i i$ the Gaussian mutation operator is not as effective as it is designed to be. To overcome these limitations, the CoFFWA is proposed, which significantly improves the exploitation capability by using an independent selection method and also increases the exploration capability by incorporating a crowdness-avoiding cooperative strategy among the fireworks. Experimental results on the CEC2013 benchmark functions indicate that CoFFWA outperforms the state-of-the-art FWA variants, artificial bee colony, differential evolution, and the standard particle swarm optimization SPSO2007/SPSO2011 in terms of convergence performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:FAE, author = "Bei Zhang and Yu-Jun Zheng and Min-Xia Zhang and Sheng-Yong Chen", title = "Fireworks Algorithm with Enhanced Fireworks Interaction", journal = j-TCBB, volume = "14", number = "1", pages = "42--55", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2446487", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "As a relatively new metaheuristic in swarm intelligence, fireworks algorithm FWA has exhibited promising performance on a wide range of optimization problems. This paper aims to improve FWA by enhancing fireworks interaction in three aspects: 1 Developing a new Gaussian mutation operator to make sparks learn from more exemplars; 2 Integrating the regular explosion operator of FWA with the migration operator of biogeography-based optimization BBO to increase information sharing; 3 Adopting a new population selection strategy that enables high-quality solutions to have high probabilities of entering the next generation without incurring high computational cost. The combination of the three strategies can significantly enhance fireworks interaction and thus improve solution diversity and suppress premature convergence. Numerical experiments on the CEC 2015 single-objective optimization test problems show the effectiveness of the proposed algorithm. The application to a high-speed train scheduling problem also demonstrates its feasibility in real-world optimization problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2017:EAB, author = "Shan Cheng and Long-Long Zhao and Xiao-Yu Jiang", title = "An Effective Application of Bacteria Quorum Sensing and Circular Elimination in {MOPSO}", journal = j-TCBB, volume = "14", number = "1", pages = "56--63", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2446484", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, an approach that incorporates a turbulence mechanism and a circular elimination strategy is presented to strengthen the performance of multi-objective particle swarm optimization MOPSO. For convergence enhancement, the turbulence mechanism derived from bacteria quorum sensing behavior is introduced to MOPSO to preserve the swarm diversity. Meanwhile, the circular elimination strategy is used to select particles for next iteration for better distribution of the Pareto-optimal solutions. The improved MOPSO algorithm has been tested on a set of benchmark functions and compared with representative multi-objective optimization algorithms. Simulation results illustrate that the algorithm outperforms the other algorithms on convergence while keep good spread performance, and could be used as an effective global optimization tool.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:MOP, author = "Yong Zhang and Dun-wei Gong and Jian Cheng", title = "Multi-Objective Particle Swarm Optimization Approach for Cost-Based Feature Selection in Classification", journal = j-TCBB, volume = "14", number = "1", pages = "64--75", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476796", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Feature selection is an important data-preprocessing technique in classification problems such as bioinformatics and signal processing. Generally, there are some situations where a user is interested in not only maximizing the classification performance but also minimizing the cost that may be associated with features. This kind of problem is called cost-based feature selection. However, most existing feature selection approaches treat this task as a single-objective optimization problem. This paper presents the first study of multi-objective particle swarm optimization PSO for cost-based feature selection problems. The task of this paper is to generate a Pareto front of nondominated solutions, that is, feature subsets, to meet different requirements of decision-makers in real-world applications. In order to enhance the search capability of the proposed algorithm, a probability-based encoding technology and an effective hybrid operator, together with the ideas of the crowding distance, the external archive, and the Pareto domination relationship, are applied to PSO. The proposed PSO-based multi-objective feature selection algorithm is compared with several multi-objective feature selection algorithms on five benchmark datasets. Experimental results show that the proposed algorithm can automatically evolve a set of nondominated solutions, and it is a highly competitive feature selection method for solving cost-based feature selection problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ni:2017:NCH, author = "Qingjian Ni and Qianqian Pan and Huimin Du and Cen Cao and Yuqing Zhai", title = "A Novel Cluster Head Selection Algorithm Based on Fuzzy Clustering and Particle Swarm Optimization", journal = j-TCBB, volume = "14", number = "1", pages = "76--84", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2446475", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An important objective of wireless sensor network is to prolong the network life cycle, and topology control is of great significance for extending the network life cycle. Based on previous work, for cluster head selection in hierarchical topology control, we propose a solution based on fuzzy clustering preprocessing and particle swarm optimization. More specifically, first, fuzzy clustering algorithm is used to initial clustering for sensor nodes according to geographical locations, where a sensor node belongs to a cluster with a determined probability, and the number of initial clusters is analyzed and discussed. Furthermore, the fitness function is designed considering both the energy consumption and distance factors of wireless sensor network. Finally, the cluster head nodes in hierarchical topology are determined based on the improved particle swarm optimization. Experimental results show that, compared with traditional methods, the proposed method achieved the purpose of reducing the mortality rate of nodes and extending the network life cycle.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Han:2017:GSM, author = "Fei Han and Chun Yang and Ya-Qi Wu and Jian-Sheng Zhu and Qing-Hua Ling and Yu-Qing Song and De-Shuang Huang", title = "A Gene Selection Method for Microarray Data Based on Binary {PSO} Encoding Gene-to-Class Sensitivity Information", journal = j-TCBB, volume = "14", number = "1", pages = "85--96", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465906", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Traditional gene selection methods for microarray data mainly considered the features' relevance by evaluating their utility for achieving accurate predication or exploiting data variance and distribution, and the selected genes were usually poorly explicable. To improve the interpretability of the selected genes as well as prediction accuracy, an improved gene selection method based on binary particle swarm optimization BPSO and prior information is proposed in this paper. In the proposed method, BPSO encoding gene-to-class sensitivity GCS information is used to perform gene selection. The gene-to-class sensitivity information, extracted from the samples by extreme learning machine ELM, is encoded into the selection process in four aspects: initializing particles, updating the particles, modifying maximum velocity, and adopting mutation operation adaptively. Constrained by the gene-to-class sensitivity information, the new method can select functional gene subsets which are significantly sensitive to the samples' classes. With the few discriminative genes selected by the proposed method, ELM, K-nearest neighbor and support vector machine classifiers achieve much high prediction accuracy on five public microarray data, which in turn verifies the efficiency and effectiveness of the proposed gene selection method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:TDP, author = "Bo Zhang and Haibin Duan", title = "Three-Dimensional Path Planning for Uninhabited Combat Aerial Vehicle Based on Predator-Prey Pigeon-Inspired Optimization in Dynamic Environment", journal = j-TCBB, volume = "14", number = "1", pages = "97--107", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2443789", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Three-dimension path planning of uninhabited combat aerial vehicle UCAV is a complicated optimal problem, which mainly focused on optimizing the flight route considering the different types of constrains under complex combating environment. A novel predator-prey pigeon-inspired optimization PPPIO is proposed to solve the UCAV three-dimension path planning problem in dynamic environment. Pigeon-inspired optimization PIO is a new bio-inspired optimization algorithm. In this algorithm, map and compass operator model and landmark operator model are used to search the best result of a function. The prey-predator concept is adopted to improve global best properties and enhance the convergence speed. The characteristics of the optimal path are presented in the form of a cost function. The comparative simulation results show that our proposed PPPIO algorithm is more efficient than the basic PIO, particle swarm optimization PSO, and different evolution DE in solving UCAV three-dimensional path planning problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2017:SNH, author = "Yuxin Liu and Chao Gao and Zili Zhang and Yuxiao Lu and Shi Chen and Mingxin Liang and Li Tao", title = "Solving {NP}-Hard Problems with \bioname{Physarum}-Based Ant Colony System", journal = j-TCBB, volume = "14", number = "1", pages = "108--120", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2462349", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "NP-hard problems exist in many real world applications. Ant colony optimization ACO algorithms can provide approximate solutions for those NP-hard problems, but the performance of ACO algorithms is significantly reduced due to premature convergence and weak robustness, etc. With these observations in mind, this paper proposes a \bioname{Physarum}-based pheromone matrix optimization strategy in ant colony system ACS for solving NP-hard problems such as traveling salesman problem TSP and 0/1 knapsack problem 0/1 KP. In the \bioname{Physarum}-inspired mathematical model, one of the unique characteristics is that critical tubes can be reserved in the process of network evolution. The optimized updating strategy employs the unique feature and accelerates the positive feedback process in ACS, which contributes to the quick convergence of the optimal solution. Some experiments were conducted using both benchmark and real datasets. The experimental results show that the optimized ACS outperforms other meta-heuristic algorithms in accuracy and robustness for solving TSPs. Meanwhile, the convergence rate and robustness for solving 0/1 KPs are better than those of classical ACS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Czeizler:2017:GTA, author = "Elena Czeizler and Tommi Hirvola and Kalle Karhu", title = "A Graph-Theoretical Approach for {Motif} Discovery in Protein Sequences", journal = j-TCBB, volume = "14", number = "1", pages = "121--130", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2511750", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Motif recognition is a challenging problem in bioinformatics due to the diversity of protein motifs. Many existing algorithms identify motifs of a given length, thus being either not applicable or not efficient when searching simultaneously for motifs of various lengths. Searching for gapped motifs, although very important, is a highly time-consuming task due to the combinatorial explosion of possible combinations implied by the consideration of long gaps. We introduce a new graph theoretical approach to identify motifs of various lengths, both with and without gaps. We compare our approach with two widely used methods: MEME and GLAM2 analyzing both the quality of the results and the required computational time. Our method provides results of a slightly higher level of quality than MEME but at a much faster rate, i.e., one eighth of MEME's query time. By using similarity indexing, we drop the query times down to an average of approximately one sixth of the ones required by GLAM2, while achieving a slightly higher level of quality of the results. More precisely, for sequence collections smaller than 50,000 bytes GLAM2 is 13 times slower, while being at least as fast as our method on larger ones. The source code of our C++ implementation is freely available in GitHub: https://github.com/hirvolt1/debruijn-motif.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jamil:2017:VIQ, author = "Hasan M. Jamil", title = "A Visual Interface for Querying Heterogeneous Phylogenetic Databases", journal = j-TCBB, volume = "14", number = "1", pages = "131--144", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520943", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Despite the recent growth in the number of phylogenetic databases, access to these wealth of resources remain largely tool or form-based interface driven. It is our thesis that the flexibility afforded by declarative query languages may offer the opportunity to access these repositories in a better way, and to use such a language to pose truly powerful queries in unprecedented ways. In this paper, we propose a substantially enhanced closed visual query language, called PhyQL, that can be used to query phylogenetic databases represented in a canonical form. The canonical representation presented helps capture most phylogenetic tree formats in a convenient way, and is used as the storage model for our PhyloBase database for which PhyQL serves as the query language. We have implemented a visual interface for the end users to pose PhyQL queries using visual icons, and drag and drop operations defined over them. Once a query is posed, the interface translates the visual query into a Datalog query for execution over the canonical database. Responses are returned as hyperlinks to phylogenies that can be viewed in several formats using the tree viewers supported by PhyloBase. Results cached in PhyQL buffer allows secondary querying on the computed results making it a truly powerful querying architecture.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:CPP, author = "Lin Zhang and Hui Liu and Yufei Huang and Xuesong Wang and Yidong Chen and Jia Meng", title = "Cancer Progression Prediction Using Gene Interaction Regularized Elastic Net", journal = j-TCBB, volume = "14", number = "1", pages = "145--154", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2511758", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Different types of genomic aberration may simultaneously contribute to tumorigenesis. To obtain a more accurate prognostic assessment to guide therapeutic regimen choice for cancer patients, the heterogeneous multi-omics data should be integrated harmoniously, which can often be difficult. For this purpose, we propose a Gene Interaction Regularized Elastic Net GIREN model that predicts clinical outcome by integrating multiple data types. GIREN conveniently embraces both gene measurements and gene-gene interaction information under an elastic net formulation, enforcing structure sparsity, and the ``grouping effect'' in solution to select the discriminate features with prognostic value. An iterative gradient descent algorithm is also developed to solve the model with regularized optimization. GIREN was applied to human ovarian cancer and breast cancer datasets obtained from The Cancer Genome Atlas, respectively. Result shows that, the proposed GIREN algorithm obtained more accurate and robust performance over competing algorithms LASSO, Elastic Net, and Semi-supervised PCA, with or without average pathway expression features in predicting cancer progression on both two datasets in terms of median area under curve AUC and interquartile range IQR, suggesting a promising direction for more effective integration of gene measurement and gene interaction information.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2017:ECF, author = "Lun Hu and Keith C. C. Chan", title = "Extracting Coevolutionary Features from Protein Sequences for Predicting Protein--Protein Interactions", journal = j-TCBB, volume = "14", number = "1", pages = "155--166", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520923", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Knowing the ways proteins interact with each other are crucial to our understanding of the functional mechanisms of proteins. It is for this reason that different approaches have been developed in attempts to predict protein-protein interactions PPIs computationally. Among them, the sequence-based approaches are preferred to the others as they do not require any information about protein properties to perform their tasks. Instead, most sequence-based approaches make use of feature extraction methods to extract features directly from protein sequences so that for each protein sequence, we can construct a feature vector. The feature vectors of every pair of proteins are then concatenated to form two classes of interacting and non-interacting proteins. The prediction of whether or not two proteins interact with each other is then formulated as a classification problem. How accurate PPI predictions can be made therefore depends on how good the features are that can be extracted from the protein sequences to allow interacting or non-interacting to be best distinguished. To do so, instead of extracting such features from individual protein sequences independently of the other protein in the same pair, we propose to jointly consider features from both sequences in a protein pair during the feature extraction process through using a novel coevolutionary feature extraction approach called CoFex. Coevolutionary features extracted by CoFex refer to the covariations found at coevolving positions. Based on the presence and absence of these coevolutionary features in the sequences of two proteins, feature vectors can be composed for pairs of proteins rather than individual proteins. The experiment results show that CoFex is a promising feature extraction approach and can improve the performance of PPI prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gangeh:2017:FSF, author = "Mehrdad J. Gangeh and Hadi Zarkoob and Ali Ghodsi", title = "Fast and Scalable Feature Selection for Gene Expression Data Using {Hilbert--Schmidt} Independence Criterion", journal = j-TCBB, volume = "14", number = "1", pages = "167--181", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2631164", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Goal: In computational biology, selecting a small subset of informative genes from microarray data continues to be a challenge due to the presence of thousands of genes. This paper aims at quantifying the dependence between gene expression data and the response variables and to identifying a subset of the most informative genes using a fast and scalable multivariate algorithm. Methods: A novel algorithm for feature selection from gene expression data was developed. The algorithm was based on the Hilbert--Schmidt independence criterion HSIC, and was partly motivated by singular value decomposition SVD. Results: The algorithm is computationally fast and scalable to large datasets. Moreover, it can be applied to problems with any type of response variables including, biclass, multiclass, and continuous response variables. The performance of the proposed algorithm in terms of accuracy, stability of the selected genes, speed, and scalability was evaluated using both synthetic and real-world datasets. The simulation results demonstrated that the proposed algorithm effectively and efficiently extracted stable genes with high predictive capability, in particular for datasets with multiclass response variables. Conclusion/Significance: The proposed method does not require the whole microarray dataset to be stored in memory, and thus can easily be scaled to large datasets. This capability is an important attribute in big data analytics, where data can be large and massively distributed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2017:GRS, author = "Hailong Hu and Zhong Li and Hongwei Dong and Tianhe Zhou", title = "Graphical Representation and Similarity Analysis of Protein Sequences Based on Fractal Interpolation", journal = j-TCBB, volume = "14", number = "1", pages = "182--192", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2511731", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A new graphical representation of protein sequences is introduced in this paper. Nine main physicochemical properties of amino acids were used to obtain a 2D discrete point set for protein sequences by applying principal component analysis. The fractal method was then employed to interpolate discrete points in constructing a graphical representation of protein sequences. Fractal dimension of the protein curve was used to analyze the similarity of protein sequences by comparing the distance of vectors representing segments of protein sequences. The Jeffrey's and Matusita distance was modified in the similarity comparison of protein sequences with different lengths. Nine different species from Nicotinamide adenine dinucleotide NADH dehydrogenase 5 ND5 protein sequences were tested as an example to demonstrate our method. Finally, a linear correlation and significance analysis was used to compare our results with other graphical representations referring to the ClustalW result. To confirm the validity of our method, eight species in NADH dehydrogenase 6 ND6 protein families and twenty-seven species in beta-globin protein families were also analyzed. Experimental results show that the proposed method is effective for the similarity analysis of proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonnici:2017:VOS, author = "Vincenzo Bonnici and Rosalba Giugno", title = "On the Variable Ordering in Subgraph Isomorphism Algorithms", journal = j-TCBB, volume = "14", number = "1", pages = "193--203", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2515595", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Graphs are mathematical structures to model several biological data. Applications to analyze them require to apply solutions for the subgraph isomorphism problem, which is NP-complete. Here, we investigate the existing strategies to reduce the subgraph isomorphism algorithm running time with emphasis on the importance of the order with which the graph vertices are taken into account during the search, called variable ordering, and its incidence on the total running time of the algorithms. We focus on two recent solutions, which are based on an effective variable ordering strategy. We discuss their comparison both with the variable ordering strategies reviewed in the paper and the other algorithms present in the ICPR2014 contest on graph matching algorithms for pattern search in biological databases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sinha:2017:PDV, author = "Arvind Kumar Sinha and Pradeep Singh and Anand Prakash and Dharm Pal and Anuradha Dube and Awanish Kumar", title = "Putative Drug and Vaccine Target Identification in \bioname{Leishmania donovani} Membrane Proteins Using Na{\"\i}ve {Bayes} Probabilistic Classifier", journal = j-TCBB, volume = "14", number = "1", pages = "204--211", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2570217", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Predicting the role of protein is one of the most challenging problems. There are few approaches available for the prediction of role of unknown protein in terms of drug target or vaccine candidate. We propose here Na{\"\i}ve Bayes probabilistic classifier, a promising method for reliable predictions. This method is tested on the proteins identified in our mass spectrometry based membrane protemics study of \bioname{Leishmania donovani} parasite that causes a fatal disease Visceral Leishmaniasis in humans all around the world. Most of the vaccine/drug targets belonging to membrane proteins are represented as key players in the pathogenesis of \bioname{Leishmania} infection. Analyses of our previous results, using Na{\"\i}ve Bayes probabilistic classifier, indicate that this method predicts the role of unknown/hypothetical protein as drug target/vaccine candidate significantly with higher precision. We have employed this method in order to provide probabilistic predictions of unknown/hypothetical proteins as targets. This study reports the unknown/hypothetical proteins of \bioname{Leishmania} membrane fraction as a potential drug targets and vaccine candidate which is vital information for this parasite. Future molecular studies and characterization of these potent targets may produce a recombinant therapeutic/prophylactic tool against Visceral Leishmaniasis. These unknown/hypothetical proteins may open a vast research field to be exploited for novel treatment strategies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wan:2017:TLM, author = "Shibiao Wan and Man-Wai Mak and Sun-Yuan Kung", title = "Transductive Learning for Multi-Label Protein Subchloroplast Localization Prediction", journal = j-TCBB, volume = "14", number = "1", pages = "212--224", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527657", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Predicting the localization of chloroplast proteins at the sub-subcellular level is an essential yet challenging step to elucidate their functions. Most of the existing subchloroplast localization predictors are limited to predicting single-location proteins and ignore the multi-location chloroplast proteins. While recent studies have led to some multi-location chloroplast predictors, they usually perform poorly. This paper proposes an ensemble transductive learning method to tackle this multi-label classification problem. Specifically, given a protein in a dataset, its composition-based sequence information and profile-based evolutionary information are respectively extracted. These two kinds of features are respectively compared with those of other proteins in the dataset. The comparisons lead to two similarity vectors which are weighted-combined to constitute an ensemble feature vector. A transductive learning model based on the least squares and nearest neighbor algorithms is proposed to process the ensemble features. We refer to the resulting predictor as as EnTrans-Chlo. Experimental results on a stringent benchmark dataset and a novel dataset demonstrate that EnTrans-Chlo significantly outperforms state-of-the-art predictors and particularly gains more than 4 percent absolute improvement on the overall actual accuracy. For readers' convenience, EnTrans-Chlo is freely available online at http://bioinfo.eie.polyu.edu.hk/EnTransChloServer/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:AIB, author = "Kuize Zhang and Lijun Zhang and Shaoshuai Mou", title = "An Application of Invertibility of {Boolean} Control Networks to the Control of the Mammalian Cell Cycle", journal = j-TCBB, volume = "14", number = "1", pages = "225--229", month = jan, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2515600", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Mar 25 07:42:59 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In Faur{\'e} et al. 2006, the dynamics of the core network regulating the mammalian cell cycle is formulated as a Boolean control network BCN model consisting of nine proteins as state nodes and a tenth protein protein CycD as the control input node. In this model, one of the state nodes, protein Cdc20, plays a central role in the separation of sister chromatids. Hence, if any Cdc20 sequence can be obtained, fully controlling the mammalian cell cycle is feasible. Motivated by this fact, we study whether any Cdc20 sequence can be obtained theoretically. We formulate the foregoing problem as the invertibility of BCNs, that is, whether one can obtain any Cdc20 sequence by designing input i.e., protein CycD sequences. We give an algorithm to verify the invertibility of any BCN, and find that the BCN model for the core network regulating the mammalian cell cycle is not invertible, that is, one cannot obtain any Cdc20 sequence. We further present another algorithm to test whether a finite Cdc20 sequence can be generated by the BCN model, which leads to a series of periodic infinite Cdc20 sequences with alternately active and inactive Cdc20 segments. States of these sequences are alternated between the two attractors in the proposed model, which reproduces correctly how a cell exits the cell cycle to enter the quiescent state, or the opposite.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:ENE, author = "Aidong Zhang", title = "Editorial from the New {Editor-in-Chief}", journal = j-TCBB, volume = "14", number = "2", pages = "251--251", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2673898", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Presents the introductory editorial for this issue of the publication.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yoo:2017:GES, author = "Illhoi Yoo and Amarda Shehu", title = "Guest Editorial for Special Section on {BIBM} 2014", journal = j-TCBB, volume = "14", number = "2", pages = "252--253", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2567998", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section were presented at the Eighth Annual IEEE International Conference on Bioinformatics and Biomedicine BIBM 2014 held in Belfast, UK, 2-5, November 2014.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lee:2017:DPD, author = "En-Shiun Annie Lee and Ho-Yin Antonio Sze-To and Man-Hon Wong and Kwong-Sak Leung and Terrence Chi-Kong Lau and Andrew K. C. Wong", title = "Discovering Protein-{DNA} Binding Cores by Aligned Pattern Clustering", journal = j-TCBB, volume = "14", number = "2", pages = "254--263", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474376", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Understanding binding cores is of fundamental importance in deciphering Protein-DNA TF-TFBS binding and gene regulation. Limited by expensive experiments, it is promising to discover them with variations directly from sequence data. Although existing computational methods have produced satisfactory results, they are one-to-one mappings with no site-specific information on residue/nucleotide variations, where these variations in binding cores may impact binding specificity. This study presents a new representation for modeling binding cores by incorporating variations and an algorithm to discover them from only sequence data. Our algorithm takes protein and DNA sequences from TRANSFAC a Protein-DNA Binding Database as input; discovers from both sets of sequences conserved regions in Aligned Pattern Clusters APCs; associates them as Protein-DNA Co-Occurring APCs; ranks the Protein-DNA Co-Occurring APCs according to their co-occurrence, and among the top ones, finds three-dimensional structures to support each binding core candidate. If successful, candidates are verified as binding cores. Otherwise, homology modeling is applied to their close matches in PDB to attain new chemically feasible binding cores. Our algorithm obtains binding cores with higher precision and much faster runtime $ \geq $ 1,600x than that of its contemporaries, discovering candidates that do not co-occur as one-to-one associated patterns in the raw data. Availability: http://www.pami.uwaterloo.ca/~ealee/files/tcbbPnDna2015/Release.zip.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:MVC, author = "Yong Zhang and Xiaohua Hu and Xingpeng Jiang", title = "Multi-View Clustering of Microbiome Samples by Robust Similarity Network Fusion and Spectral Clustering", journal = j-TCBB, volume = "14", number = "2", pages = "264--271", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474387", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microbiome datasets are often comprised of different representations or views which provide complementary information, such as genes, functions, and taxonomic assignments. Integration of multi-view information for clustering microbiome samples could create a comprehensive view of a given microbiome study. Similarity network fusion SNF can efficiently integrate similarities built from each view of data into a unique network that represents the full spectrum of the underlying data. Based on this method, we develop a Robust Similarity Network Fusion RSNF approach which combines the strength of random forest and the advantage of SNF at data aggregation. The experimental results indicate the strength of the proposed strategy. The method substantially improves the clustering performance significantly comparing to several state-of-the-art methods in several datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kifer:2017:OAD, author = "Ilona Kifer and Rui M. Branca and Amir Ben-Dor and Linhui Zhai and Ping Xu and Janne Lehtio and Zohar Yakhini", title = "Optimizing Analytical Depth and Cost Efficiency of {IEF-LC\slash MS} Proteomics", journal = j-TCBB, volume = "14", number = "2", pages = "272--281", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2452901", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "IEF LC-MS/MS is an analytical method that incorporates a two-step sample separation prior to MS identification of proteins. When analyzing complex samples this preparatory separation allows for higher analytical depth and improved quantification accuracy of proteins. However, cost and analysis time are greatly increased as each analyzed IEF fraction is separately profiled using LC-MS/MS. We propose an approach that selects a subset of IEF fractions for LC-MS/MS analysis that is highly informative in the context of a group of proteins of interest. Specifically, our method allows a significant reduction in cost and instrument time as compared to the standard protocol of running all fractions, with little compromise to coverage. We develop algorithmics to optimize the selection of the IEF fractions on which to run LC-MS/MS. We translate the fraction optimization task to Minimum Set Cover, a well-studied NP-hard problem. We develop heuristic solutions and compare them in terms of effectiveness and running times. We provide examples to demonstrate advantages and limitations of each algorithmic approach. Finally, we test our methodology by applying it to experimental data obtained from IEF LC-MS/MS analysis of yeast and human samples. We demonstrate the benefit of this approach for analyzing complex samples with a focus on different protein sets of interest.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2017:AOI, author = "Huiru Zheng and Chaoyang Wang and Haiying Wang", title = "Analysis of Organization of the Interactome Using Dominating Sets: a Case Study on Cell Cycle Interaction Networks", journal = j-TCBB, volume = "14", number = "2", pages = "282--289", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459712", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this study, a minimum dominating set based approach was developed and implemented as a Cytoscape plugin to identify critical and redundant proteins in a protein interaction network. We focused on the investigation of the properties associated with critical proteins in the context of the analysis of interaction networks specific to cell cycle in both yeast and human. A total of 132 yeast genes and 129 human proteins have been identified as critical nodes while 950 in yeast and 980 in human have been categorized as redundant nodes. A clear distinction between critical and redundant proteins was observed when examining their topological parameters including betweenness centrality, suggesting a central role of critical proteins in the control of a network. The significant differences in terms of gene coexpression and functional similarity were observed between the two sets of proteins in yeast. Critical proteins were found to be enriched with essential genes in both networks and have a more deleterious effect on the network integrity than their redundant counterparts. Furthermore, we obtained statistically significant enrichments of proteins that govern human diseases including cancer-related and virus-targeted genes in the corresponding set of critical proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Becker:2017:MTL, author = "Matthias Becker and Nadia Magnenat-Thalmann", title = "Muscle Tissue Labeling of Human Lower Limb in Multi-Channel {mDixon MR} Imaging: Concepts and Applications", journal = j-TCBB, volume = "14", number = "2", pages = "290--299", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459679", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With increasing resolutions and number of acquisitions, medical imaging more and more requires computer support for interpretation as currently not all imaging data is fully used. In our work, we show how multi-channel images can be used for robust air masking and reliable muscle tissue detection in the human lower limb. We exploit additional channels that are usually discarded in clinical routine. We use the common mDixon acquisition protocol for MR imaging. A series of thresholding, morphological, and connectivity operations is used for processing. We demonstrate our fully automated approach on four subjects and present a comparison with manual labeling. We discuss how this work is used for advanced and intuitive visualization, the quantification of tissue types, pose estimation, initialization of further segmentation methods, and how it could be used in clinical environments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Veltri:2017:IRA, author = "Daniel Veltri and Uday Kamath and Amarda Shehu", title = "Improving Recognition of Antimicrobial Peptides and Target Selectivity through Machine Learning and Genetic Programming", journal = j-TCBB, volume = "14", number = "2", pages = "300--313", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2462364", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Growing bacterial resistance to antibiotics is spurring research on utilizing naturally-occurring antimicrobial peptides AMPs as templates for novel drug design. While experimentalists mainly focus on systematic point mutations to measure the effect on antibacterial activity, the computational community seeks to understand what determines such activity in a machine learning setting. The latter seeks to identify the biological signals or features that govern activity. In this paper, we advance research in this direction through a novel method that constructs and selects complex sequence-based features which capture information about distal patterns within a peptide. Comparative analysis with state-of-the-art methods in AMP recognition reveals our method is not only among the top performers, but it also provides transparent summarizations of antibacterial activity at the sequence level. Moreover, this paper demonstrates for the first time the capability not only to recognize that a peptide is an AMP or not but also to predict its target selectivity based on models of activity against only Gram-positive, only Gram-negative, or both types of bacteria. The work described in this paper is a step forward in computational research seeking to facilitate AMP design or modification in the wet laboratory.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Basu:2017:GEI, author = "Mitra Basu and Yi Pan and Jianxin Wang", title = "Guest {Editors} Introduction to the Special Section on {ISBRA 2014}", journal = j-TCBB, volume = "14", number = "2", pages = "314--315", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2676859", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section were presented at the 10th International Symposium on Bioinformatics Research and Applications ISBRA 2014, which was held at Zhangjiajie, China, June 28-30, 2014.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:TPI, author = "Fa Zhang and Yu Chen and Fei Ren and Xuan Wang and Zhiyong Liu and Xiaohua Wan", title = "A Two-Phase Improved Correlation Method for Automatic Particle Selection in {Cryo-EM}", journal = j-TCBB, volume = "14", number = "2", pages = "316--325", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2415787", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Particle selection from cryo-electron microscopy Cryo-EM images is very important for high-resolution reconstruction of macromolecular structure. The methods of particle selection can be roughly grouped into two classes, template-matching methods and feature-based methods. In general, template-matching methods usually generate better results than feature-based methods. However, the accuracy of template-matching methods is restricted by the noise and low contrast of Cryo-EM images. Moreover, the processing speed of template-matching methods, restricted by the random orientation of particles, further limits their practical applications. In this paper, combining the advantages of feature-based methods and template-matching methods, we present a two-phase improved correlation method for automatic, fast particle selection. In Phase I, we generate a preliminary particle set using rotation-invariant features of particles. In Phase II, we filter the preliminary particle set using a correlation method to reduce the interference of the high noise background and improve the precision of particle selection. We apply several optimization strategies, including a modified adaboost algorithm, Divide and Conquer technique, cascade strategy and graphics processing unit parallel technique, to improve feature recognition ability and reduce processing time. In addition, we developed two correlation score functions for different correlation situations. Experimental results on the benchmark of Cryo-EM images show that our method can improve the accuracy and processing speed of particle selection significantly.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2017:API, author = "Yi Liu and Bin Ma and Kaizhong Zhang and Gilles Lajoie", title = "An Approach for Peptide Identification by {De Novo} Sequencing of Mixture Spectra", journal = j-TCBB, volume = "14", number = "2", pages = "326--336", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2407401", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mixture spectra occur quite frequently in a typical wet-lab mass spectrometry experiment, which result from the concurrent fragmentation of multiple precursors. The ability to efficiently and confidently identify mixture spectra is essential to alleviate the existent bottleneck of low mass spectra identification rate. However, most of the traditional computational methods are not suitable for interpreting mixture spectra, because they still take the assumption that the acquired spectra come from the fragmentation of a single precursor. In this manuscript, we formulate the mixture spectra de novo sequencing problem mathematically, and propose a dynamic programming algorithm for the problem. Additionally, we use both simulated and real mixture spectra data sets to verify the merits of the proposed algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yan:2017:NNP, author = "Yan and Anthony J. Kusalik and Fang-Xiang Wu", title = "{NovoExD}: {De} novo Peptide Sequencing for {ETD\slash ECD} Spectra", journal = j-TCBB, volume = "14", number = "2", pages = "337--344", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2389813", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "De novo peptide sequencing using tandem mass spectrometry MS/MS data has become a major computational method for sequence identification in recent years. With the development of new instruments and technology, novel computational methods have emerged with enhanced performance. However, there are only a few methods focusing on ECD/ETD spectra, which mainly contain variants of $c$ -ions and $z$ -ions. Here, a de novo sequencing method for ECD/ETD spectra, NovoExD, is presented. NovoExD applies a new form of spectrum graph with multiple edge types called a GMET, considers multiple peptide tags, and integrates amino acid combination AAC and fragment ion charge information. Its performance is compared with another successful de novo sequencing method, pNovo+, which has an option for ECD/ETD spectra. Experiments conducted on three different datasets show that the average full length peptide identification accuracy of NovoExD is as high as 88.70 percent, and that NovoExD's average accuracy is more than 20 percent greater on all datasets than that of pNovo+.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2017:ISI, author = "Lin Zhu and Su-Ping Deng and Zhu-Hong You and De-Shuang Huang", title = "Identifying Spurious Interactions in the Protein-Protein Interaction Networks Using Local Similarity Preserving Embedding", journal = j-TCBB, volume = "14", number = "2", pages = "345--352", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2407393", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In recent years, a remarkable amount of protein-protein interaction PPI data are being available owing to the advance made in experimental high-throughput technologies. However, the experimentally detected PPI data usually contain a large amount of spurious links, which could contaminate the analysis of the biological significance of protein links and lead to incorrect biological discoveries, thereby posing new challenges to both computational and biological scientists. In this paper, we develop a new embedding algorithm called local similarity preserving embedding LSPE to rank the interaction possibility of protein links. By going beyond limitations of current geometric embedding methods for network denoising and emphasizing the local information of PPI networks, LSPE can avoid the unstableness of previous methods. We demonstrate experimental results on benchmark PPI networks and show that LSPE was the overall leader, outperforming the state-of-the-art methods in topological false links elimination problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2017:MDR, author = "Xingpeng Jiang and Xiaohua Hu and Weiwei Xu", title = "Microbiome Data Representation by Joint Nonnegative Matrix Factorization with {Laplacian} Regularization", journal = j-TCBB, volume = "14", number = "2", pages = "353--359", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2440261", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microbiome datasets are often comprised of different representations or views which provide complementary information to understand microbial communities, such as metabolic pathways, taxonomic assignments, and gene families. Data integration methods including approaches based on nonnegative matrix factorization NMF combine multi-view data to create a comprehensive view of a given microbiome study by integrating multi-view information. In this paper, we proposed a novel variant of NMF which called Laplacian regularized joint non-negative matrix factorization LJ-NMF for integrating functional and phylogenetic profiles from HMP. We compare the performance of this method to other variants of NMF. The experimental results indicate that the proposed method offers an efficient framework for microbiome data analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peng:2017:PPF, author = "Wei Peng and Min Li and Lu Chen and Lusheng Wang", title = "Predicting Protein Functions by Using Unbalanced Random Walk Algorithm on Three Biological Networks", journal = j-TCBB, volume = "14", number = "2", pages = "360--369", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394314", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the gap between the sequence data and their functional annotations becomes increasing wider, many computational methods have been proposed to annotate functions for unknown proteins. However, designing effective methods to make good use of various biological resources is still a big challenge for researchers due to function diversity of proteins. In this work, we propose a new method named ThrRW, which takes several steps of random walking on three different biological networks: protein interaction network PIN, domain co-occurrence network DCN, and functional interrelationship network FIN, respectively, so as to infer functional information from neighbors in the corresponding networks. With respect to the topological and structural differences of the three networks, the number of walking steps in the three networks will be different. In the course of working, the functional information will be transferred from one network to another according to the associations between the nodes in different networks. The results of experiment on S. cerevisiae data show that our method achieves better prediction performance not only than the methods that consider both PIN data and GO term similarities, but also than the methods using both PIN data and protein domain information, which verifies the effectiveness of our method on integrating multiple biological data sources.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2017:UCC, author = "Min Li and Yu Lu and Zhibei Niu and Fang-Xiang Wu", title = "United Complex Centrality for Identification of Essential Proteins from {PPI} Networks", journal = j-TCBB, volume = "14", number = "2", pages = "370--380", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2394487", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Essential proteins are indispensable for the survival or reproduction of an organism. Identification of essential proteins is not only necessary for the understanding of the minimal requirements for cellular life, but also important for the disease study and drug design. With the development of high-throughput techniques, a large number of protein-protein interaction data are available, which promotes the studies of essential proteins from the network level. Up to now, though a series of computational methods have been proposed, the prediction precision still needs to be improved. In this paper, we propose a new method, United complex Centrality UC, to identify essential proteins by integrating the protein complexes with the topological features of protein-protein interaction PPI networks. By analyzing the relationship between the essential proteins and the known protein complexes of S. cerevisiae and human, we find that the proteins in complexes are more likely to be essential compared with the proteins not included in any complexes and the proteins appeared in multiple complexes are more inclined to be essential compared to those only appeared in a single complex. Considering that some protein complexes generated by computational methods are inaccurate, we also provide a modified version of UC with parameter alpha, named UC-P. The experimental results show that protein complex information can help identify the essential proteins more accurate both for the PPI network of S. cerevisiae and that of human. The proposed method UC performs obviously better than the eight previously proposed methods DC, IC, EC, SC, BC, CC, NC, and LAC for identifying essential proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mahjani:2017:FCF, author = "Behrang Mahjani and Salman Toor and Carl Nettelblad and Sverker Holmgren", title = "A Flexible Computational Framework Using {$R$} and Map-Reduce for Permutation Tests of Massive Genetic Analysis of Complex Traits", journal = j-TCBB, volume = "14", number = "2", pages = "381--392", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527639", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In quantitative trait locus QTL mapping significance of putative QTL is often determined using permutation testing. The computational needs to calculate the significance level are immense, $ 10^4 $ up to $ 10^8 $ or even more permutations can be needed. We have previously introduced the PruneDIRECT algorithm for multiple QTL scan with epistatic interactions. This algorithm has specific strengths for permutation testing. Here, we present a flexible, parallel computing framework for identifying multiple interacting QTL using the PruneDIRECT algorithm which uses the map-reduce model as implemented in Hadoop. The framework is implemented in R, a widely used software tool among geneticists. This enables users to rearrange algorithmic steps to adapt genetic models, search algorithms, and parallelization steps to their needs in a flexible way. Our work underlines the maturity of accessing distributed parallel computing for computationally demanding bioinformatics applications through building workflows within existing scientific environments. We investigate the PruneDIRECT algorithm, comparing its performance to exhaustive search and DIRECT algorithm using our framework on a public cloud resource. We find that PruneDIRECT is vastly superior for permutation testing, and perform $ 2 \times 10^5 $ permutations for a 2D QTL problem in $ 15 $ hours, using $ 100 $ cloud processes. We show that our framework scales out almost linearly for a 3D QTL search.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2017:NSC, author = "Van-Nui Nguyen and Kai-Yao Huang and Chien-Hsun Huang and K. Robert Lai and Tzong-Yi Lee", title = "A New Scheme to Characterize and Identify Protein Ubiquitination Sites", journal = j-TCBB, volume = "14", number = "2", pages = "393--403", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520939", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein ubiquitination, involving the conjugation of ubiquitin on lysine residue, serves as an important modulator of many cellular functions in eukaryotes. Recent advancements in proteomic technology have stimulated increasing interest in identifying ubiquitination sites. However, most computational tools for predicting ubiquitination sites are focused on small-scale data. With an increasing number of experimentally verified ubiquitination sites, we were motivated to design a predictive model for identifying lysine ubiquitination sites for large-scale proteome dataset. This work assessed not only single features, such as amino acid composition AAC, amino acid pair composition AAPC and evolutionary information, but also the effectiveness of incorporating two or more features into a hybrid approach to model construction. The support vector machine SVM was applied to generate the prediction models for ubiquitination site identification. Evaluation by five-fold cross-validation showed that the SVM models learned from the combination of hybrid features delivered a better prediction performance. Additionally, a motif discovery tool, MDDLogo, was adopted to characterize the potential substrate motifs of ubiquitination sites. The SVM models integrating the MDDLogo-identified substrate motifs could yield an average accuracy of 68.70 percent. Furthermore, the independent testing result showed that the MDDLogo-clustered SVM models could provide a promising accuracy 78.50 percent and perform better than other prediction tools. Two cases have demonstrated the effective prediction of ubiquitination sites with corresponding substrate motifs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Matsieva:2017:RSF, author = "Julia Matsieva and Steven Kelk and Celine Scornavacca and Chris Whidden and Dan Gusfield", title = "A Resolution of the Static Formulation Question for the Problem of Computing the History Bound", journal = j-TCBB, volume = "14", number = "2", pages = "404--417", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527645", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Evolutionary data has been traditionally modeled via phylogenetic trees; however, branching alone cannot model conflicting phylogenetic signals, so networks are used instead. Ancestral recombination graphs ARGs are used to model the evolution of incompatible sets of SNP data, allowing each site to mutate only once. The model often aims to minimize the number of recombinations. Similarly, incompatible cluster data can be represented by a reticulation network that minimizes reticulation events. The ARG literature has traditionally been disjoint from the reticulation network literature. By building on results from the reticulation network literature, we resolve an open question of interest to the ARG community. We explicitly prove that the History Bound, a lower bound on the number of recombinations in an ARG for a binary matrix, which was previously only defined procedurally, is equal to the minimum number of reticulation nodes in a network for the corresponding cluster data. To facilitate the proof, we give an algorithm that constructs this network using intermediate values from the procedural History Bound definition. We then develop a top-down algorithm for computing the History Bound, which has the same worst-case runtime as the known dynamic program, and show that it is likely to run faster in typical cases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rajaraman:2017:ACR, author = "Ashok Rajaraman and Joao Paulo Pereira Zanetti and Jan Manuch and Cedric Chauve", title = "Algorithms and Complexity Results for Genome Mapping Problems", journal = j-TCBB, volume = "14", number = "2", pages = "418--430", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2528239", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome mapping algorithms aim at computing an ordering of a set of genomic markers based on local ordering information such as adjacencies and intervals of markers. In most genome mapping models, markers are assumed to occur uniquely in the resulting map. We introduce algorithmic questions that consider repeats, i.e., markers that can have several occurrences in the resulting map. We show that, provided with an upper bound on the copy number of repeated markers and with intervals that span full repeat copies, called repeat spanning intervals, the problem of deciding if a set of adjacencies and repeat spanning intervals admits a genome representation is tractable if the target genome can contain linear and/or circular chromosomal fragments. We also show that extracting a maximum cardinality or weight subset of repeat spanning intervals given a set of adjacencies that admits a genome realization is NP-hard but fixed-parameter tractable in the maximum copy number and the number of adjacent repeats, and tractable if intervals contain a single repeated marker.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Alden:2017:EAS, author = "Kieran Alden and Jon Timmis and Paul S. Andrews and Henrique Veiga-Fernandes and Mark Coles", title = "Extending and Applying {Spartan} to Perform Temporal Sensitivity Analyses for Predicting Changes in Influential Biological Pathways in Computational Models", journal = j-TCBB, volume = "14", number = "2", pages = "431--442", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527654", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Through integrating real time imaging, computational modelling, and statistical analysis approaches, previous work has suggested that the induction of and response to cell adhesion factors is the key initiating pathway in early lymphoid tissue development, in contrast to the previously accepted view that the process is triggered by chemokine mediated cell recruitment. These model derived hypotheses were developed using spartan, an open-source sensitivity analysis toolkit designed to establish and understand the relationship between a computational model and the biological system that model captures. Here, we extend the functionality available in spartan to permit the production of statistical analyses that contrast the behavior exhibited by a computational model at various simulated time-points, enabling a temporal analysis that could suggest whether the influence of biological mechanisms changes over time. We exemplify this extended functionality by using the computational model of lymphoid tissue development as a time-lapse tool. By generating results at twelve- hour intervals, we show how the extensions to spartan have been used to suggest that lymphoid tissue development could be biphasic, and predict the time-point when a switch in the influence of biological mechanisms might occur.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dias:2017:GWS, author = "Oscar Dias and Daniel Gomes and Paulo Vilaca and Joao Cardoso and Miguel Rocha and Eugenio C. Ferreira and Isabel Rocha", title = "Genome-Wide Semi-Automated Annotation of Transporter Systems", journal = j-TCBB, volume = "14", number = "2", pages = "443--456", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527647", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Usually, transport reactions are added to genome-scale metabolic models GSMMs based on experimental data and literature. This approach does not allow associating specific genes with transport reactions, which impairs the ability of the model to predict effects of gene deletions. Novel methods for systematic genome-wide transporter functional annotation and their integration into GSMMs are therefore necessary. In this work, an automatic system to detect and classify all potential membrane transport proteins for a given genome and integrate the related reactions into GSMMs is proposed, based on the identification and classification of genes that encode transmembrane proteins. The Transport Reactions Annotation and Generation TRIAGE tool identifies the metabolites transported by each transmembrane protein and its transporter family. The localization of the carriers is also predicted and, consequently, their action is confined to a given membrane. The integration of the data provided by TRIAGE with highly curated models allowed the identification of new transport reactions. TRIAGE is included in the new release of merlin, a software tool previously developed by the authors, which expedites the GSMM reconstruction processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mkrtchyan:2017:OLS, author = "Katya Mkrtchyan and Anirban Chakraborty and Amit K. Roy-Chowdhury", title = "Optimal Landmark Selection for Registration of {$4$D} Confocal Image Stacks in \bioname{Arabidopsis}", journal = j-TCBB, volume = "14", number = "2", pages = "457--467", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527655", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Technologically advanced imaging techniques have allowed us to generate and study the internal part of a tissue over time by capturing serial optical images that contain spatio-temporal slices of hundreds of tightly packed cells. Image registration of such live-imaging datasets of developing multicelluar tissues is one of the essential components of all image analysis pipelines. In this paper, we present a fully automated 4DX-Y-Z-T registration method of live imaging stacks that takes care of both temporal and spatial misalignments. We present a novel landmark selection methodology where the shape features of individual cells are not of high quality and highly distinguishable. The proposed registration method finds the best image slice correspondence from consecutive image stacks to account for vertical growth in the tissue and the discrepancy in the choice of the starting focal point. Then, it uses local graph-based approach to automatically find corresponding landmark pairs, and finally the registration parameters are used to register the entire image stack. The proposed registration algorithm combined with an existing tracking method is tested on multiple image stacks of tightly packed cells of Arabidopsis shoot apical meristem and the results show that it significantly improves the accuracy of cell lineages and division statistics.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hayamizu:2017:CMS, author = "Momoko Hayamizu and Hiroshi Endo and Kenji Fukumizu", title = "A Characterization of Minimum Spanning Tree-Like Metric Spaces", journal = j-TCBB, volume = "14", number = "2", pages = "468--471", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550431", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent years have witnessed a surge of biological interest in the minimum spanning tree MST problem for its relevance to automatic model construction using the distances between data points. Despite the increasing use of MST algorithms for this purpose, the goodness-of-fit of an MST to the data is often elusive because no quantitative criteria have been developed to measure it. Motivated by this, we provide a necessary and sufficient condition to ensure that a metric space on $n$ points can be represented by a fully labeled tree on $n$ vertices, and thereby determine when an MST preserves all pairwise distances between points in a finite metric space.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Boes:2017:LBN, author = "Olivier Boes and Mareike Fischer and Steven Kelk", title = "A Linear Bound on the Number of States in Optimal Convex Characters for Maximum Parsimony Distance", journal = j-TCBB, volume = "14", number = "2", pages = "472--477", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2543727", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Given two phylogenetic trees on the same set of taxa $X$ , the maximum parsimony distance $ d_\mathrm {MP}$ is defined as the maximum, ranging over all characters $ \chi $ on $X$ , of the absolute difference in parsimony score induced by $ \chi $ on the two trees. In this note, we prove that for binary trees there exists a character achieving this maximum that is convex on one of the trees i.e., the parsimony score induced on that tree is equal to the number of states in the character minus 1 and such that the number of states in the character is at most $ 7 d_\mathrm {MP} - 5$ . This is the first non-trivial bound on the number of states required by optimal characters, convex or otherwise. The result potentially has algorithmic significance because, unlike general characters, convex characters with a bounded number of states can be enumerated in polynomial time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2017:BAR, author = "Thao Thi Phuong Nguyen and Vinh Sy Le and Hai Bich Ho and Quang Si Le", title = "Building Ancestral Recombination Graphs for Whole Genomes", journal = j-TCBB, volume = "14", number = "2", pages = "478--483", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2542801", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a heuristic algorithm, called ARG4WG, to build plausible ancestral recombination graphs ARGs from thousands of whole genome samples. By using the longest shared end for recombination inference, ARG4WG constructs ARGs with small numbers of recombination events that perform well in association mapping on genome-wide association studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Athanasiadis:2017:DMR, author = "Emmanouil Athanasiadis and Marilena Bourdakou and George Spyrou", title = "{D-Map}: Random Walking on Gene Network Inference Maps Towards differential Avenue Discovery", journal = j-TCBB, volume = "14", number = "2", pages = "484--490", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2535267", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Differential rewiring of cellular interaction networks between disease and healthy state is of great importance. Through a systems level approach, malfunctioned mechanisms that are absent in the normal cases, may enlighten the key-players in terms of genes and their interaction chains related to disease. We have developed D-Map, a publicly available user-friendly web application, capable of generating and manipulating advanced differential networks by combining state-of-the-art inference reconstruction methods with random walk simulations. The inputs are expression profiles obtained from the Gene Expression Omnibus and a gene list under investigation. Differential networks may be visualized and interpreted through the use of D-Map interface, where display of the disease, the normal and the common state can be performed, interactively. A case study scenario concerning Alzheimer's disease, as well as breast, lung, and bladder cancer was conducted in order to demonstrate the usefulness of the proposed methodology to different disease types. Findings were consistent with the current bibliography, and the provided interaction lists may be further explored towards novel biological insights of the investigated diseases. The DMap web-application is available at: http://bioserver-3.bioacademy.gr/Bioserver/DMap/index.php.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mottelet:2017:MFA, author = "Stephane Mottelet and Gil Gaullier and Georges Sadaka", title = "Metabolic Flux Analysis in Isotope Labeling Experiments Using the Adjoint Approach", journal = j-TCBB, volume = "14", number = "2", pages = "491--497", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2544299", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Comprehension of metabolic pathways is considerably enhanced by metabolic flux analysis MFA-ILE in isotope labeling experiments. The balance equations are given by hundreds of algebraic stationary MFA or ordinary differential equations nonstationary MFA, and reducing the number of operations is therefore a crucial part of reducing the computation cost. The main bottleneck for deterministic algorithms is the computation of derivatives, particularly for nonstationary MFA. In this article, we explain how the overall identification process may be speeded up by using the adjoint approach to compute the gradient of the residual sum of squares. The proposed approach shows significant improvements in terms of complexity and computation time when it is compared with the usual direct approach. Numerical results are obtained for the central metabolic pathways of Escherichia coli and are validated against reference software in the stationary case. The methods and algorithms described in this paper are included in the sysmetab software package distributed under an Open Source license at http://forge.scilab.org/index.php/p/sysmetab/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Goldweber:2017:PGB, author = "Scott Goldweber and Jamal Theodore and John Torcivia-Rodriguez and Vahan Simonyan and Raja Mazumder", title = "{Pubcast} and {Genecast}: Browsing and Exploring Publications and Associated Curated Content in Biology Through Mobile Devices", journal = j-TCBB, volume = "14", number = "2", pages = "498--500", month = mar, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2542802", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Jun 5 18:41:07 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Services such as Facebook, Amazon, and eBay were once solely accessed from stationary computers. These web services are now being used increasingly on mobile devices. We acknowledge this new reality by providing users a way to access publications and a curated cancer mutation database on their mobile device with daily automated updates. Availability: http://hive.biochemistry.gwu.edu/tools/HivePubcast.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2017:GES, author = "Fei Wang and Xiao-Li Li and Jason T. L. Wang and See-Kiong Ng", title = "Guest Editorial: Special Section on Biological Data Mining and Its Applications in Healthcare", journal = j-TCBB, volume = "14", number = "3", pages = "501--502", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2612558", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biologists are stepping up their efforts in understanding the biological processes that underlie disease pathways in the clinical contexts. This has resulted in a flood of biological and clinical data-genomic sequences, DNA microarrays, protein interactions, biomedical images, disease pathways, etc. The rapid adoption of Electronic Health Records EHRs across healthcare systems, coupled with the capability of linking EHRs to research biorepositories, provides a unique opportunity for conducting large-scale Precision Medicine research. As a result, data mining techniques, for knowledge discovery and deriving data driven insights from various data sources, are increasingly important in modern biology and healthcare. The purpose of this special section is to bring together the researchers in bioinformatics, healthcare informatics, and data mining to share about their current research, and their visions on future directions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2017:PSP, author = "Hua Wang and Lin Yan and Heng Huang and Chris Ding", title = "From Protein Sequence to Protein Function via Multi-Label Linear Discriminant Analysis", journal = j-TCBB, volume = "14", number = "3", pages = "503--513", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591529", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sequence describes the primary structure of a protein, which contains important structural, characteristic, and genetic information and thereby motivates many sequence-based computational approaches to infer protein function. Among them, feature-base approaches attract increased attention because they make prediction from a set of transformed and more biologically meaningful sequence features. However, original features extracted from sequence are usually of high dimensionality and often compromised by irrelevant patterns, therefore dimension reduction is necessary prior to classification for efficient and effective protein function prediction. A protein usually performs several different functions within an organism, which makes protein function prediction a multi-label classification problem. In machine learning, multi-label classification deals with problems where each object may belong to more than one class. As a well-known feature reduction method, linear discriminant analysis LDA has been successfully applied in many practical applications. It, however, by nature is designed for single-label classification, in which each object can belong to exactly one class. Because directly applying LDA in multi-label classification causes ambiguity when computing scatters matrices, we apply a new Multi-label Linear Discriminant Analysis MLDA approach to address this problem and meanwhile preserve powerful classification capability inherited from classical LDA. We further extend MLDA by $ \ell_1$-normalization to overcome the problem of over-counting data points with multiple labels. In addition, we incorporate biological network data using Laplacian embedding into our method, and assess the reliability of predicted putative functions. Extensive empirical evaluations demonstrate promising results of our methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2017:TUG, author = "Jianqiang Li and Fei Wang", title = "Towards Unsupervised Gene Selection: a Matrix Factorization Framework", journal = j-TCBB, volume = "14", number = "3", pages = "514--521", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591545", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The recent development of microarray gene expression techniques have made it possible to offer phenotype classification of many diseases. However, in gene expression data analysis, each sample is represented by quite a large number of genes, and many of them are redundant or insignificant to clarify the disease problem. Therefore, how to efficiently select the most useful genes has been becoming one of the most hot research topics in the gene expression data analysis. In this paper, a novel unsupervised two-stage coarse-fine gene selection method is proposed. In the first stage, we apply the kmeans algorithm to over-cluster the genes and discard some redundant genes. In the second stage, we select the most representative genes from the remaining ones based on matrix factorization. Finally the experimental results on several data sets are presented to show the effectiveness of our method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ju:2017:EAC, author = "Chelsea J. -T. Ju and Zhuangtian Zhao and Wei Wang", title = "Efficient Approach to Correct Read Alignment for Pseudogene Abundance Estimates", journal = j-TCBB, volume = "14", number = "3", pages = "522--533", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591533", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA-Sequencing has been the leading technology to quantify expression of thousands of genes simultaneously. The data analysis of an RNA-Seq experiment starts from aligning short reads to the reference genome/transcriptome or reconstructed transcriptome. However, current aligners lack the sensitivity to distinguish reads that come from homologous regions of an genome. One group of these homologies is the paralog pseudogenes. Pseudogenes arise from duplication of a set of protein coding genes, and have been considered as degraded paralogs in the genome due to their lost of functionality. Recent studies have provided evidence to support their novel regulatory roles in biological processes. With the growing interests in quantifying the expression level of pseudogenes at different tissues or cell lines, it is critical to have a sensitive method that can correctly align ambiguous reads and accurately estimate the expression level among homologous genes. Previously in PseudoLasso, we proposed a linear regression approach to learn read alignment behaviors, and to leverage this knowledge for abundance estimation and alignment correction. In this paper, we extend the work of PseudoLasso by grouping the homologous genomic regions into different communities using a community detection algorithm, followed by building a linear regression model separately for each community. The results show that this approach is able to retain the same accuracy as PseudoLasso. By breaking the genome into smaller homologous communities, the running time is improved from quadratic growth to linear with respect to the number of genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Walker:2017:ATS, author = "Peter B. Walker and Jacob N. Norris and Anna E. Tschiffely and Melissa L. Mehalick and Craig A. Cunningham and Ian N. Davidson", title = "Applications of Transductive Spectral Clustering Methods in a Military Medical Concussion Database", journal = j-TCBB, volume = "14", number = "3", pages = "534--544", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591549", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Traumatic brain injury TBI is one of the most common forms of neurotrauma that has affected more than 250,000 military service members over the last decade alone. While in battle, service members who experience TBI are at significant risk for the development of normal TBI symptoms, as well as risk for the development of psychological disorders such as Post-Traumatic Stress Disorder PTSD. As such, these service members often require intense bouts of medication and therapy in order to resume full return-to-duty status. The primary aim of this study is to identify the relationship between the administration of specific medications and reductions in symptomology such as headaches, dizziness, or light-headedness. Service members diagnosed with mTBI and seen at the Concussion Restoration Care Center CRCC in Afghanistan were analyzed according to prescribed medications and symptomology. Here, we demonstrate that in such situations with sparse labels and small feature sets, classic analytic techniques such as logistic regression, support vector machines, na{\"\i}ve Bayes, random forest, decision trees, and k-nearest neighbor are not well suited for the prediction of outcomes. We attribute our findings to several issues inherent to this problem setting and discuss several advantages of spectral graph methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Stojanovic:2017:MHQ, author = "Jelena Stojanovic and Djordje Gligorijevic and Vladan Radosavljevic and Nemanja Djuric and Mihajlo Grbovic and Zoran Obradovic", title = "Modeling Healthcare Quality via Compact Representations of Electronic Health Records", journal = j-TCBB, volume = "14", number = "3", pages = "545--554", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591523", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Increased availability of Electronic Health Record EHR data provides unique opportunities for improving the quality of health services. In this study, we couple EHRs with the advanced machine learning tools to predict three important parameters of healthcare quality. More specifically, we describe how to learn low-dimensional vector representations of patient conditions and clinical procedures in an unsupervised manner, and generate feature vectors of hospitalized patients useful for predicting their length of stay, total incurred charges, and mortality rates. In order to learn vector representations, we propose to employ state-of-the-art language models specifically designed for modeling co-occurrence of diseases and applied clinical procedures. The proposed model is trained on a large-scale EHR database comprising more than 35 million hospitalizations in California over a period of nine years. We compared the proposed approach to several alternatives and evaluated their effectiveness by measuring accuracy of regression and classification models used for three predictive tasks considered in this study. Our model outperformed the baseline models on all tasks, indicating a strong potential of the proposed approach for advancing quality of the healthcare system.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Moskovitch:2017:PCO, author = "Robert Moskovitch and Hyunmi Choi and George Hripcsak and Nicholas Tatonetti", title = "Prognosis of Clinical Outcomes with Temporal Patterns and Experiences with One Class Feature Selection", journal = j-TCBB, volume = "14", number = "3", pages = "555--563", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591539", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate prognosis of outcome events, such as clinical procedures or disease diagnosis, is central in medicine. The emergence of longitudinal clinical data, like the Electronic Health Records EHR, represents an opportunity to develop automated methods for predicting patient outcomes. However, these data are highly dimensional and very sparse, complicating the application of predictive modeling techniques. Further, their temporal nature is not fully exploited by current methods, and temporal abstraction was recently used which results in symbolic time intervals representation. We present Maitreya, a framework for the prediction of outcome events that leverages these symbolic time intervals. Using Maitreya, learn predictive models based on the temporal patterns in the clinical records that are prognostic markers and use these markers to train predictive models for eight clinical procedures. In order to decrease the number of patterns that are used as features, we propose the use of three one class feature selection methods. We evaluate the performance of Maitreya under several parameter settings, including the one-class feature selection, and compare our results to that of atemporal approaches. In general, we found that the use of temporal patterns outperformed the atemporal methods, when representing the number of pattern occurrences.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2017:BCO, author = "Xin Wang and Jinbo Bi", title = "Bi-convex Optimization to Learn Classifiers from Multiple Biomedical Annotations", journal = j-TCBB, volume = "14", number = "3", pages = "564--575", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576457", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of constructing classifiers from multiple annotators who provide inconsistent training labels is important and occurs in many application domains. Many existing methods focus on the understanding and learning of the crowd behaviors. Several probabilistic algorithms consider the construction of classifiers for specific tasks using consensus of multiple labelers annotations. These methods impose a prior on the consensus and develop an expectation-maximization algorithm based on logistic regression loss. We extend the discussion to the hinge loss commonly used by support vector machines. Our formulations form bi-convex programs that construct classifiers and estimate the reliability of each labeler simultaneously. Each labeler is associated with a reliability parameter, which can be a constant, or class-dependent, or varies for different examples. The hinge loss is modified by replacing the true labels by the weighted combination of labelers' labels with reliabilities as weights. Statistical justification is discussed to motivate the use of linear combination of labels. In parallel to the expectation-maximization algorithm for logistic-based methods, efficient alternating algorithms are developed to solve the proposed bi-convex programs. Experimental results on benchmark datasets and three real-world biomedical problems demonstrate that the proposed methods either outperform or are competitive to the state of the art.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Harrison:2017:GEI, author = "Robert W. Harrison and Ion I. Mandoiu and Alexander Zelikovsky", title = "Guest Editors' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "14", number = "3", pages = "576--577", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2673738", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers from this special section were presented at the 11th International Symposium on Bioinformatics Research and Application ISBRA, which was held at Old Dominion University in Norfolk, VA on May 7-10, 2015. The ISBRA symposium provides a forum for the exchange of ideas and results among researchers, developers, and practitioners working on all aspects of bioinformatics and computational biology and their applications.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Biswas:2017:ECM, author = "Abhishek Biswas and Desh Ranjan and Mohammad Zubair and Stephanie Zeil and Kamal {Al Nasr} and Jing He", title = "An Effective Computational Method Incorporating Multiple Secondary Structure Predictions in Topology Determination for Cryo-{EM} Images", journal = j-TCBB, volume = "14", number = "3", pages = "578--586", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2543721", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A key idea in de novo modeling of a medium-resolution density image obtained from cryo-electron microscopy is to compute the optimal mapping between the secondary structure traces observed in the density image and those predicted on the protein sequence. When secondary structures are not determined precisely, either from the image or from the amino acid sequence of the protein, the computational problem becomes more complex. We present an efficient method that addresses the secondary structure placement problem in presence of multiple secondary structure predictions and computes the optimal mapping. We tested the method using 12 simulated images from $ \alpha $-proteins and two Cryo-EM images of $ \alpha $--$ \beta $ proteins. We observed that the rank of the true topologies is consistently improved by using multiple secondary structure predictions instead of a single prediction. The results show that the algorithm is robust and works well even when errors/misses in the predicted secondary structures are present in the image or the sequence. The results also show that the algorithm is efficient and is able to handle proteins with as many as 33 helices.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kordi:2017:CDT, author = "Misagh Kordi and Mukul S. Bansal", title = "On the Complexity of Duplication-Transfer-Loss Reconciliation with Non-Binary Gene Trees", journal = j-TCBB, volume = "14", number = "3", pages = "587--599", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2511761", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Duplication-Transfer-Loss DTL reconciliation has emerged as a powerful technique for studying gene family evolution in the presence of horizontal gene transfer. DTL reconciliation takes as input a gene family phylogeny and the corresponding species phylogeny, and reconciles the two by postulating speciation, gene duplication, horizontal gene transfer, and gene loss events. Efficient algorithms exist for finding optimal DTL reconciliations when the gene tree is binary. However, gene trees are frequently non-binary. With such non-binary gene trees, the reconciliation problem seeks to find a binary resolution of the gene tree that minimizes the reconciliation cost. Given the prevalence of non-binary gene trees, many efficient algorithms have been developed for this problem in the context of the simpler Duplication-Loss DL reconciliation model. Yet, no efficient algorithms exist for DTL reconciliation with non-binary gene trees and the complexity of the problem remains unknown. In this work, we resolve this open question by showing that the problem is, in fact, NP-hard. Our reduction applies to both the dated and undated formulations of DTL reconciliation. By resolving this long-standing open problem, this work will spur the development of both exact and heuristic algorithms for this important problem.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guo:2017:SGW, author = "Xuan Guo and Jing Zhang and Zhipeng Cai and Ding-Zhu Du and Yi Pan", title = "Searching Genome-Wide Multi-Locus Associations for Multiple Diseases Based on {Bayesian} Inference", journal = j-TCBB, volume = "14", number = "3", pages = "600--610", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2527648", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Taking the advantage of high-throughput single nucleotide polymorphism SNP genotyping technology, large genome-wide association studies GWASs have been considered to hold promise for unraveling complex relationships between genotypes and phenotypes. Current multi-locus-based methods are insufficient to detect interactions with diverse genetic effects on multifarious diseases. Also, statistic tests for high-order epistasis $ \geq 2 $ SNPs raise huge computational and analytical challenges because the computation increases exponentially as the growth of the cardinality of SNPs combinations. In this paper, we provide a simple, fast and powerful method, named DAM, using Bayesian inference to detect genome-wide multi-locus epistatic interactions in multiple diseases. Experimental results on simulated data demonstrate that our method is powerful and efficient. We also apply DAM on two GWAS datasets from WTCCC, i.e., Rheumatoid Arthritis and Type 1 Diabetes, and identify some novel findings. Therefore, we believe that our method is suitable and efficient for the full-scale analysis of multi-disease-related interactions in GWASs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ji:2017:CIF, author = "Hao Ji and Yaohang Li and Seth H. Weinberg", title = "Calcium Ion Fluctuations Alter Channel Gating in a Stochastic Luminal Calcium Release Site Model", journal = j-TCBB, volume = "14", number = "3", pages = "611--619", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2498552", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Stochasticity and small system size effects in complex biochemical reaction networks can greatly alter transient and steady-state system properties. A common approach to modeling reaction networks, which accounts for system size, is the chemical master equation that governs the dynamics of the joint probability distribution for molecular copy number. However, calculation of the stationary distribution is often prohibitive, due to the large state-space associated with most biochemical reaction networks. Here, we analyze a network representing a luminal calcium release site model and investigate to what extent small system size effects and calcium fluctuations, driven by ion channel gating, influx and diffusion, alter steady-state ion channel properties including open probability. For a physiological ion channel gating model and number of channels, the state-space may be between approximately $ 10^6 - 10^8 $ elements, and a novel modified block power method is used to solve the associated dominant eigenvector problem required to calculate the stationary distribution. We demonstrate that both small local cytosolic domain volume and a small number of ion channels drive calcium fluctuations that result in deviation from the corresponding model that neglects small system size effects.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Galvao:2017:SCP, author = "Gustavo Rodrigues Galvao and Christian Baudet and Zanoni Dias", title = "Sorting Circular Permutations by Super Short Reversals", journal = j-TCBB, volume = "14", number = "3", pages = "620--633", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2515594", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider the problem of sorting a circular permutation by super short reversals i.e., reversals of length at most 2, a problem that finds application in comparative genomics. Polynomial-time solutions to the unsigned version of this problem are known, but the signed version remained open. In this paper, we present the first polynomial-time solution to the signed version of this problem. Moreover, we perform experiments for inferring phylogenies of two different groups of bacterial species and compare our results with the phylogenies presented in previous works. Finally, to facilitate phylogenetic studies based on the methods studied in this paper, we present a web tool for rearrangement-based phylogenetic inference using short operations, such as super short reversals.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zheng:2017:MMA, author = "Weihua Zheng and Kenli Li and Keqin Li and Hing Cheung So", title = "A Modified Multiple Alignment {Fast Fourier Transform} with Higher Efficiency", journal = j-TCBB, volume = "14", number = "3", pages = "634--645", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2530064", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiple sequence alignment MSA is the most common task in bioinformatics. Multiple alignment fast Fourier transform MAFFT is the fastest MSA program among those the accuracy of the resulting alignments can be comparable with the most accurate MSA programs. In this paper, we modify the correlation computation scheme of the MAFFT for further efficiency improvement in three aspects. First, novel complex number based amino acid and nucleotide expressions are utilized in the modified correlation. Second, linear convolution with a limitation is proposed for computing the correlation of amino acid and nucleotide sequences. Third, we devise a fast Fourier transform FFT algorithm for computing linear convolution. The FFT algorithm is based on conjugate pair split-radix FFT and does not require the permutation of order, and it is new as only real parts of the final outputs are required. Simulation results show that the speed of the modified scheme is 107.58 to 365.74 percent faster than that of the original MAFFT for one execution of the function Falign of MAFFT, indicating its faster realization.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ezzat:2017:DTI, author = "Ali Ezzat and Peilin Zhao and Min Wu and Xiao-Li Li and Chee-Keong Kwoh", title = "Drug-Target Interaction Prediction with Graph Regularized Matrix Factorization", journal = j-TCBB, volume = "14", number = "3", pages = "646--656", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2530062", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Experimental determination of drug-target interactions is expensive and time-consuming. Therefore, there is a continuous demand for more accurate predictions of interactions using computational techniques. Algorithms have been devised to infer novel interactions on a global scale where the input to these algorithms is a drug-target network i.e., a bipartite graph where edges connect pairs of drugs and targets that are known to interact. However, these algorithms had difficulty predicting interactions involving new drugs or targets for which there are no known interactions i.e., ``orphan'' nodes in the network. Since data usually lie on or near to low-dimensional non-linear manifolds, we propose two matrix factorization methods that use graph regularization in order to learn such manifolds. In addition, considering that many of the non-occurring edges in the network are actually unknown or missing cases, we developed a preprocessing step to enhance predictions in the ``new drug'' and ``new target'' cases by adding edges with intermediate interaction likelihood scores. In our cross validation experiments, our methods achieved better results than three other state-of-the-art methods in most cases. Finally, we simulated some ``new drug'' and ``new target'' cases and found that GRMF predicted the left-out interactions reasonably well.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Thanh:2017:ECT, author = "Vo Hong Thanh and Roberto Zunino and Corrado Priami", title = "Efficient Constant-Time Complexity Algorithm for Stochastic Simulation of Large Reaction Networks", journal = j-TCBB, volume = "14", number = "3", pages = "657--667", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2530066", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Exact stochastic simulation is an indispensable tool for a quantitative study of biochemical reaction networks. The simulation realizes the time evolution of the model by randomly choosing a reaction to fire and update the system state according to a probability that is proportional to the reaction propensity. Two computationally expensive tasks in simulating large biochemical networks are the selection of next reaction firings and the update of reaction propensities due to state changes. We present in this work a new exact algorithm to optimize both of these simulation bottlenecks. Our algorithm employs the composition-rejection on the propensity bounds of reactions to select the next reaction firing. The selection of next reaction firings is independent of the number reactions while the update of propensities is skipped and performed only when necessary. It therefore provides a favorable scaling for the computational complexity in simulating large reaction networks. We benchmark our new algorithm with the state of the art algorithms available in literature to demonstrate its applicability and efficiency.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Arram:2017:LFA, author = "James Arram and Thomas Kaplan and Wayne Luk and Peiyong Jiang", title = "Leveraging {FPGAs} for Accelerating Short Read Alignment", journal = j-TCBB, volume = "14", number = "3", pages = "668--677", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2535385", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the key challenges facing genomics today is how to efficiently analyze the massive amounts of data produced by next-generation sequencing platforms. With general-purpose computing systems struggling to address this challenge, specialized processors such as the Field-Programmable Gate Array FPGA are receiving growing interest. The means by which to leverage this technology for accelerating genomic data analysis is however largely unexplored. In this paper, we present a runtime reconfigurable architecture for accelerating short read alignment using FPGAs. This architecture exploits the reconfigurability of FPGAs to allow the development of fast yet flexible alignment designs. We apply this architecture to develop an alignment design which supports exact and approximate alignment with up to two mismatches. Our design is based on the FM-index, with optimizations to improve the alignment performance. In particular, the $n$-step FM-index, index oversampling, a seed-and-compare stage, and bi-directional backtracking are included. Our design is implemented and evaluated on a 1U Maxeler MPC-X2000 dataflow node with eight Altera Stratix-V FPGAs. Measurements show that our design is 28 times faster than Bowtie2 running with 16 threads on dual Intel Xeon E5-2640 CPUs, and nine times faster than Soap3-dp running on an NVIDIA Tesla C2070 GPU.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2017:PNA, author = "Feng He and Guanghui Zhu and Yin-Ying Wang and Xing-Ming Zhao and De-Shuang Huang", title = "{PCID}: a Novel Approach for Predicting Disease Comorbidity by Integrating Multi-Scale Data", journal = j-TCBB, volume = "14", number = "3", pages = "678--686", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550443", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Disease comorbidity is the presence of one or more diseases along with a primary disorder, which causes additional pain to patients and leads to the failure of standard treatments compared with single diseases. Therefore, the identification of potential comorbidity can help prevent those comorbid diseases when treating a primary disease. Unfortunately, most of current known disease comorbidities are discovered occasionally in clinic, and our knowledge about comorbidity is far from complete. Despite the fact that many efforts have been made to predict disease comorbidity, the prediction accuracy of existing computational approaches needs to be improved. By investigating the factors underlying disease comorbidity, e.g., mutated genes and rewired protein-protein interactions PPIs, we here present a novel algorithm to predict disease comorbidity by integrating multi-scale data ranging from genes to phenotypes. Benchmark results on real data show that our approach outperforms existing algorithms, and some of our novel predictions are validated with those reported in literature, indicating the effectiveness and predictive power of our approach. In addition, we identify some pathway and PPI patterns that underlie the co-occurrence between a primary disease and certain disease classes, which can help explain how the comorbidity is initiated from molecular perspectives.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zeng:2017:PVD, author = "Xiangxiang Zeng and Yuanlu Liao and Yuansheng Liu and Quan Zou", title = "Prediction and Validation of Disease Genes Using {HeteSim} Scores", journal = j-TCBB, volume = "14", number = "3", pages = "687--695", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520947", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Deciphering the gene disease association is an important goal in biomedical research. In this paper, we use a novel relevance measure, called HeteSim, to prioritize candidate disease genes. Two methods based on heterogeneous networks constructed using protein-protein interaction, gene-phenotype associations, and phenotype-phenotype similarity, are presented. In HeteSim_MultiPath HSMP, HeteSim scores of different paths are combined with a constant that dampens the contributions of longer paths. In HeteSim_SVM HSSVM, HeteSim scores are combined with a machine learning method. The 3-fold experiments show that our non-machine learning method HSMP performs better than the existing non-machine learning methods, our machine learning method HSSVM obtains similar accuracy with the best existing machine learning method CATAPULT. From the analysis of the top 10 predicted genes for different diseases, we found that HSSVM avoid the disadvantage of the existing machine learning based methods, which always predict similar genes for different diseases. The data sets and Matlab code for the two methods are freely available for download at \url{http://lab.malab.cn/data/HeteSim/index.jsp}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shabash:2017:RVR, author = "Boris Shabash and Kay C. Wiese", title = "{RNA} Visualization: Relevance and the Current State-of-the-Art Focusing on Pseudoknots", journal = j-TCBB, volume = "14", number = "3", pages = "696--712", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2522421", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA visualization is crucial in order to understand the relationship that exists between RNA structure and its function, as well as the development of better RNA structure prediction algorithms. However, in the context of RNA visualization, one key structure remains difficult to visualize: Pseudoknots. Pseudoknots occur in RNA folding when two secondary structural components form base-pairs between them. The three-dimensional nature of these components makes them challenging to visualize in two-dimensional media, such as print media or screens. In this review, we focus on the advancements that have been made in the field of RNA visualization in two-dimensional media in the past two decades. The review aims at presenting all relevant aspects of pseudoknot visualization. We start with an overview of several pseudoknotted structures and their relevance in RNA function. Next, we discuss the theoretical basis for RNA structural topology classification and present RNA classification systems for both pseudoknotted and non-pseudoknotted RNAs. Each description of RNA classification system is followed by a discussion of the software tools and algorithms developed to date to visualize RNA, comparing the different tools' strengths and shortcomings.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peng:2017:HNB, author = "Chen Peng and Ao Li", title = "A Heterogeneous Network Based Method for Identifying {GBM}-Related Genes by Integrating Multi-Dimensional Data", journal = j-TCBB, volume = "14", number = "3", pages = "713--720", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2555314", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The emergence of multi-dimensional data offers opportunities for more comprehensive analysis of the molecular characteristics of human diseases and therefore improving diagnosis, treatment, and prevention. In this study, we proposed a heterogeneous network based method by integrating multi-dimensional data HNMD to identify GBM-related genes. The novelty of the method lies in that the multi-dimensional data of GBM from TCGA dataset that provide comprehensive information of genes, are combined with protein-protein interactions to construct a weighted heterogeneous network, which reflects both the general and disease-specific relationships between genes. In addition, a propagation algorithm with resistance is introduced to precisely score and rank GBM-related genes. The results of comprehensive performance evaluation show that the proposed method significantly outperforms the network based methods with single-dimensional data and other existing approaches. Subsequent analysis of the top ranked genes suggests they may be functionally implicated in GBM, which further corroborates the superiority of the proposed method. The source code and the results of HNMD can be downloaded from the following URL: http://bioinformatics.ustc.edu.cn/hnmd/ .", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vasylchenkova:2017:CMA, author = "Anastasiia Vasylchenkova and Miha Mraz and Nikolaj Zimic and Miha Moskon", title = "Classical Mechanics Approach Applied to Analysis of Genetic Oscillators", journal = j-TCBB, volume = "14", number = "3", pages = "721--727", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550456", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological oscillators present a fundamental part of several regulatory mechanisms that control the response of various biological systems. Several analytical approaches for their analysis have been reported recently. They are, however, limited to only specific oscillator topologies and/or to giving only qualitative answers, i.e., is the dynamics of an oscillator given the parameter space oscillatory or not. Here, we present a general analytical approach that can be applied to the analysis of biological oscillators. It relies on the projection of biological systems to classical mechanics systems. The approach is able to provide us with relatively accurate results in the meaning of type of behavior system reflects i.e., oscillatory or not and periods of potential oscillations without the necessity to conduct expensive numerical simulations. We demonstrate and verify the proposed approach on three different implementations of amplified negative feedback oscillator.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{daSilva:2017:GDH, author = "Poly H. da Silva and Raphael Machado and Simone Dantas and Marilia D. V. Braga", title = "Genomic Distance with High Indel Costs", journal = j-TCBB, volume = "14", number = "3", pages = "728--732", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2555301", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We determine complexity of computing the DCJ-indel distance, when DCJ and indel operations have distinct constant costs, by showing an exact formula that can be computed in linear time for any choice of constant costs for DCJ and indel operations. We additionally consider the problem of triangular inequality disruption and propose an algorithmically efficient correction on each member of the family of DCJ-indel.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2017:PCD, author = "Min Wu and Le Ou-Yang and Xiao-Li Li", title = "Protein Complex Detection via Effective Integration of Base Clustering Solutions and Co-Complex Affinity Scores", journal = j-TCBB, volume = "14", number = "3", pages = "733--739", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2552176", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the increasing availability of protein interaction data, various computational methods have been developed to predict protein complexes. However, different computational methods may have their own advantages and limitations. Ensemble clustering has thus been studied to minimize the potential bias and risk of individual methods and generate prediction results with better coverage and accuracy. In this paper, we extend the traditional ensemble clustering by taking into account the co-complex affinity scores and present an Ensemble H ierarchical Clustering framework EnsemHC to detect protein complexes. First, we construct co-cluster matrices by integrating the clustering results with the co-complex evidences. Second, we sum up the constructed co-cluster matrices to derive a final ensemble matrix via a novel iterative weighting scheme. Finally, we apply the hierarchical clustering to generate protein complexes from the final ensemble matrix. Experimental results demonstrate that our EnsemHC performs better than its base clustering methods and various existing integrative methods. In addition, we also observed that integrating the clusters and co-complex affinity scores from different data sources will improve the prediction performance, e.g., integrating the clusters from TAP data and co-complex affinities from binary PPI data achieved the best performance in our experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2017:RRB, author = "Fangfei Li and Yang Tang", title = "Robust Reachability of {Boolean} Control Networks", journal = j-TCBB, volume = "14", number = "3", pages = "740--745", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2555302", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Boolean networks serve a powerful tool in analysis of genetic regulatory networks since it emphasizes the fundamental principles and establishes a nature framework for capturing the dynamics of regulation of cellular states. In this paper, the robust reachability of Boolean control networks is investigated by means of semi-tensor product. Necessary and sufficient conditions for the robust reachability of Boolean control networks are provided, in which control inputs relying on disturbances or not are considered, respectively. Besides, the corresponding control algorithms are developed for these two cases. A reduced model of the lac operon in the Escherichia coli is presented to show the effectiveness of the presented results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yan:2017:TNW, author = "Zhangming Yan and Ke Liu and Shunian Xiang and Zhirong Sun", title = "{txCoords}: a Novel {Web} Application for Transcriptomic Peak Re-Mapping", journal = j-TCBB, volume = "14", number = "3", pages = "746--748", month = may, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2568178", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Since the development of new technologies such as RIP-Seq and m6A-seq, peak calling has become an important step in transcriptomic sequencing data analysis. However, many of the reported genomic coordinates of transcriptomic peaks are incorrect owing to negligence of the introns. There is currently a lack of a convenient tool to address this problem. Here, we present txCoords, a novel and easy-to-use web application for transcriptomic peak re-mapping. txCoords can be used to correct the incorrectly reported transcriptomic peaks and retrieve the true sequences. It also supports visualization of the re-mapped peaks in a schematic figure or from the UCSC Genome Browser. Our web server is freely available at http://www.bioinfo.tsinghua.edu.cn/txCoords.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jha:2017:GLB, author = "Manjari Jha and Raunaq Malhotra and Raj Acharya", title = "A Generalized Lattice Based Probabilistic Approach for Metagenomic Clustering", journal = j-TCBB, volume = "14", number = "4", pages = "749--761", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2563422", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Metagenomics involves the analysis of genomes of microorganisms sampled directly from their environment. Next Generation Sequencing allows a high-throughput sampling of small segments from genomes in the metagenome to generate reads. To study the properties and relationships of the microorganisms present, clustering can be performed based on the inherent composition of the sampled reads for unknown species. We propose a two-dimensional lattice based probabilistic model for clustering metagenomic datasets. The occurrence of a species in the metagenome is estimated using a lattice of probabilistic distributions over small sized genomic sequences. The two dimensions denote distributions for different sizes and groups of words, respectively. The lattice structure allows for additional support for a node from its neighbors when the probabilistic support for the species using the parameters of the current node is deemed insufficient. We also show convergence for our algorithm. We test our algorithm on simulated metagenomic data containing bacterial species and observe more than 85\% precision. We also evaluate our algorithm on an in vitro-simulated bacterial metagenome and on human patient data, and show a better clustering than other algorithms even for short reads and varied abundance. The software and datasets can be downloaded from \url{https://github.com/lattclus/lattice-metage}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bandyopadhyay:2017:NFV, author = "Sanghamitra Bandyopadhyay and Koushik Mallick", title = "A New Feature Vector Based on Gene Ontology Terms for Protein--Protein Interaction Prediction", journal = j-TCBB, volume = "14", number = "4", pages = "762--770", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2555304", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-protein interaction PPI plays a key role in understanding cellular mechanisms in different organisms. Many supervised classifiers like Random Forest RF and Support Vector Machine SVM have been used for intra or inter-species interaction prediction. For improving the prediction performance, in this paper we propose a novel set of features to represent a protein pair using their annotated Gene Ontology GO terms, including their ancestors. In our approach, a protein pair is treated as a document bag of words, where the terms annotating the two proteins represent the words. Feature value of each word is calculated using information content of the corresponding term multiplied by a coefficient, which represents the weight of that term inside a document i.e., a protein pair. We have tested the performance of the classifier using the proposed feature on different well known data sets of different species like S. cerevisiae, H. Sapiens, E. Coli, and D. melanogaster. We compare it with the other GO based feature representation technique, and demonstrate its competitive performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Park:2017:NAP, author = "Heewon Park and Yuichi Shiraishi and Seiya Imoto and Satoru Miyano", title = "A Novel Adaptive Penalized Logistic Regression for Uncovering Biomarker Associated with Anti-Cancer Drug Sensitivity", journal = j-TCBB, volume = "14", number = "4", pages = "771--782", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2561937", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a novel adaptive penalized logistic regression modeling strategy based on Wilcoxon rank sum test WRST to effectively uncover driver genes in classification. In order to incorporate significance of gene in classification, we first measure significance of each gene by gene ranking method based on WRST, and then the adaptive L $_1$-type penalty is discriminately imposed on each gene depending on the measured importance degree of gene. The incorporating significance of genes into adaptive logistic regression enables us to impose a large amount of penalty on low ranking genes, and thus noise genes are easily deleted from the model and we can effectively identify driver genes. Monte Carlo experiments and real world example are conducted to investigate effectiveness of the proposed approach. In Sanger data analysis, we introduce a strategy to identify expression modules indicating gene regulatory mechanisms via the principal component analysis PCA, and perform logistic regression modeling based on not a single gene but gene expression modules. We can see through Monte Carlo experiments and real world example that the proposed adaptive penalized logistic regression outperforms feature selection and classification compared with existing L $_1$ -type regularization. The discriminately imposed penalty based on WRST effectively performs crucial gene selection, and thus our method can improve classification accuracy without interruption of noise genes. Furthermore, it can be seen through Sanger data analysis that the method for gene expression modules based on principal components and their loading scores provides interpretable results in biological viewpoints.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2017:SAL, author = "Chao Wang and Dong Dai and Xi Li and Aili Wang and Xuehai Zhou", title = "{SuperMIC}: Analyzing Large Biological Datasets in Bioinformatics with Maximal Information Coefficient", journal = j-TCBB, volume = "14", number = "4", pages = "783--795", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The maximal information coefficient MIC has been proposed to discover relationships and associations between pairs of variables. It poses significant challenges for bioinformatics scientists to accelerate the MIC calculation, especially in genome sequencing and biological annotations. In this paper, we explore a parallel approach which uses MapReduce framework to improve the computing efficiency and throughput of the MIC computation. The acceleration system includes biological data storage on HDFS, preprocessing algorithms, distributed memory cache mechanism, and the partition of MapReduce jobs. Based on the acceleration approach, we extend the traditional two-variable algorithm to multiple variables algorithm. The experimental results show that our parallel solution provides a linear speedup comparing with original algorithm without affecting the correctness and sensitivity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nawab:2017:IBA, author = "Rao Muhammad Adeel Nawab and Mark Stevenson and Paul Clough", title = "An {IR}-Based Approach Utilizing Query Expansion for Plagiarism Detection in {MEDLINE}", journal = j-TCBB, volume = "14", number = "4", pages = "796--804", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2542803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The identification of duplicated and plagiarized passages of text has become an increasingly active area of research. In this paper, we investigate methods for plagiarism detection that aim to identify potential sources of plagiarism from MEDLINE, particularly when the original text has been modified through the replacement of words or phrases. A scalable approach based on Information Retrieval is used to perform candidate document selection-the identification of a subset of potential source documents given a suspicious text-from MEDLINE. Query expansion is performed using the ULMS Metathesaurus to deal with situations in which original documents are obfuscated. Various approaches to Word Sense Disambiguation are investigated to deal with cases where there are multiple Concept Unique Identifiers CUIs for a given term. Results using the proposed IR-based approach outperform a state-of-the-art baseline based on Kullback--Leibler Distance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Murugesan:2017:BMI, author = "Sugeerth Murugesan and Kristopher Bouchard and Jesse A. Brown and Bernd Hamann and William W. Seeley and Andrew Trujillo and Gunther H. Weber", title = "Brain Modulyzer: Interactive Visual Analysis of Functional Brain Connectivity", journal = j-TCBB, volume = "14", number = "4", pages = "805--818", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2564970", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present Brain Modulyzer, an interactive visual exploration tool for functional magnetic resonance imaging fMRI brain scans, aimed at analyzing the correlation between different brain regions when resting or when performing mental tasks. Brain Modulyzer combines multiple coordinated views-such as heat maps, node link diagrams, and anatomical views-using brushing and linking to provide an anatomical context for brain connectivity data. Integrating methods from graph theory and analysis, e.g., community detection and derived graph measures, makes it possible to explore the modular and hierarchical organization of functional brain networks. Providing immediate feedback by displaying analysis results instantaneously while changing parameters gives neuroscientists a powerful means to comprehend complex brain structure more effectively and efficiently and supports forming hypotheses that can then be validated via statistical analysis. To demonstrate the utility of our tool, we present two case studies-exploring progressive supranuclear palsy, as well as memory encoding and retrieval.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Barragan:2017:COA, author = "Sandra Barragan and Cristina Rueda and Miguel A. Fernandez", title = "Circular Order Aggregation and Its Application to Cell-Cycle Genes Expressions", journal = j-TCBB, volume = "14", number = "4", pages = "819--829", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2565469", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The aim of circular order aggregation is to find a circular order on a set of $n$ items using angular values from $p$ heterogeneous data sets. This problem is new in the literature and has been motivated by the biological question of finding the order among the peak expression of a group of cell cycle genes. In this paper, two very different approaches to solve the problem that use pairwise and triplewise information are proposed. Both approaches are analyzed and compared using theoretical developments and numerical studies, and applied to the cell cycle data that motivated the problem.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:CFG, author = "Jing Zhang and Hao Wang and Wu-chun Feng", title = "{cuBLASTP}: Fine-Grained Parallelization of Protein Sequence Search on {CPU + GPU}", journal = j-TCBB, volume = "14", number = "4", pages = "830--843", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2489662", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "BLAST, short for Basic Local Alignment Search Tool, is a ubiquitous tool used in the life sciences for pairwise sequence search. However, with the advent of next-generation sequencing NGS, whether at the outset or downstream from NGS, the exponential growth of sequence databases is outstripping our ability to analyze the data. While recent studies have utilized the graphics processing unit GPU to speedup the BLAST algorithm for searching protein sequences i.e., BLASTP, these studies use coarse-grained parallelism, where one sequence alignment is mapped to only one thread. Such an approach does not efficiently utilize the capabilities of a GPU, particularly due to the irregularity of BLASTP in both execution paths and memory-access patterns. To address the above shortcomings, we present a fine-grained approach to parallelize BLASTP, where each individual phase of sequence search is mapped to many threads on a GPU. This approach, which we refer to as cuBLASTP, reorders data-access patterns and reduces divergent branches of the most time-consuming phases i.e., hit detection and ungapped extension. In addition, cuBLASTP optimizes the remaining phases i.e., gapped extension and alignment with trace back on a multicore CPU and overlaps their execution with the phases running on the GPU.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xie:2017:DFO, author = "Lu Xie and Gregory R. Smith and Russell Schwartz", title = "Derivative-Free Optimization of Rate Parameters of Capsid Assembly Models from Bulk in Vitro Data", journal = j-TCBB, volume = "14", number = "4", pages = "844--855", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2563421", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The assembly of virus capsids proceeds by a complicated cascade of association and dissociation steps, the great majority of which cannot be directly experimentally observed. This has made capsid assembly a rich field for computational models, but there are substantial obstacles to model inference for such systems. Here, we describe progress on fitting kinetic rate constants defining capsid assembly models to experimental data, a difficult data-fitting problem because of the high computational cost of simulating assembly trajectories, the stochastic noise inherent to the models, and the limited and noisy data available for fitting. We evaluate the merits of data-fitting methods based on derivative-free optimization DFO relative to gradient-based methods used in prior work. We further explore the advantages of alternative data sources through simulation of a model of time-resolved mass spectrometry data, a technology for monitoring bulk capsid assembly that can be expected to provide much richer data than previously used static light scattering approaches. The results show that advances in both the data and the algorithms can improve model inference. More informative data sources lead to high-quality fits for all methods, but DFO methods show substantial advantages on less informative data sources that better represent current experimental practice.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:DCN, author = "Yue Zhang and Yiu-ming Cheung and Bo Xu and Weifeng Su", title = "Detection Copy Number Variants from {NGS} with Sparse and Smooth Constraints", journal = j-TCBB, volume = "14", number = "4", pages = "856--867", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2561933", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It is known that copy number variations CNVs are associated with complex diseases and particular tumor types, thus reliable identification of CNVs is of great potential value. Recent advances in next generation sequencing NGS data analysis have helped manifest the richness of CNV information. However, the performances of these methods are not consistent. Reliably finding CNVs in NGS data in an efficient way remains a challenging topic, worthy of further investigation. Accordingly, we tackle the problem by formulating CNVs identification into a quadratic optimization problem involving two constraints. By imposing the constraints of sparsity and smoothness, the reconstructed read depth signal from NGS is anticipated to fit the CNVs patterns more accurately. An efficient numerical solution tailored from alternating direction minimization ADM framework is elaborated. We demonstrate the advantages of the proposed method, namely ADM-CNV, by comparing it with six popular CNV detection methods using synthetic, simulated, and empirical sequencing data. It is shown that the proposed approach can successfully reconstruct CNV patterns from raw data, and achieve superior or comparable performance in detection of the CNVs compared to the existing counterparts.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fan:2017:FTS, author = "Xiaofei Fan and Xian Zhang and Ligang Wu and Michael Shi", title = "Finite-Time Stability Analysis of Reaction-Diffusion Genetic Regulatory Networks with Time-Varying Delays", journal = j-TCBB, volume = "14", number = "4", pages = "868--879", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2552519", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper is concerned with the finite-time stability problem of the delayed genetic regulatory networks GRNs with reaction-diffusion terms under Dirichlet boundary conditions. By constructing a Lyapunov-Krasovskii functional including quad-slope integrations, we establish delay-dependent finite-time stability criteria by employing the Wirtinger-type integral inequality, Gronwall inequality, convex technique, and reciprocally convex technique. In addition, the obtained criteria are also reaction-diffusion-dependent. Finally, a numerical example is provided to illustrate the effectiveness of the theoretical results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pouyan:2017:ICP, author = "Maziyar Baran Pouyan and Mehrdad Nourani", title = "Identifying Cell Populations in Flow Cytometry Data Using Phenotypic Signatures", journal = j-TCBB, volume = "14", number = "4", pages = "880--891", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550428", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Single-cell flow cytometry is a technology that measures the expression of several cellular markers simultaneously for a large number of cells. Identification of homogeneous cell populations, currently done by manual biaxial gating, is highly subjective and time consuming. To overcome the shortcomings of manual gating, automatic algorithms have been proposed. However, the performance of these methods highly depends on the shape of populations and the dimension of the data. In this paper, we have developed a time-efficient method that accurately identifies cellular populations. This is done based on a novel technique that estimates the initial number of clusters in high dimension and identifies the final clusters by merging clusters using their phenotypic signatures in low dimension. The proposed method is called SigClust. We have applied SigClust to four public datasets and compared it with five well known methods in the field. The results are promising and indicate higher performance and accuracy compared to similar approaches reported in literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Keller:2017:ISL, author = "Anne Florence Keller and Nicolas Ambert and Arnaud Legendre and Mathieu Bedez and Jean-Marie Bouteiller and Serge Bischoff and Michel Baudry and Saliha Moussaoui", title = "Impact of Synaptic Localization and Subunit Composition of Ionotropic Glutamate Receptors on Synaptic Function: Modeling and Simulation Studies", journal = j-TCBB, volume = "14", number = "4", pages = "892--904", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2561932", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ionotropic NMDA and AMPA glutamate receptors iGluRs play important roles in synaptic function under physiological and pathological conditions. iGluRs sub-synaptic localization and subunit composition are dynamically regulated by activity-dependent insertion and internalization. However, understanding the impact on synaptic transmission of changes in composition and localization of iGluRs is difficult to address experimentally. To address this question, we developed a detailed computational model of glutamatergic synapses, including spine and dendritic compartments, elementary models of subtypes of NMDA and AMPA receptors, glial glutamate transporters, intracellular calcium, and a calcium-dependent signaling cascade underlying the development of long-term potentiation LTP. These synapses were distributed on a neuron model and numerical simulations were performed to assess the impact of changes in composition and localization synaptic versus extrasynaptic of iGluRs on synaptic transmission and plasticity following various patterns of presynaptic stimulation. In addition, the effects of various pharmacological compounds targeting NMDARs or AMPARs were determined. Our results showed that changes in NMDAR localization have a greater impact on synaptic plasticity than changes in AMPARs. Moreover, the results suggest that modulators of AMPA and NMDA receptors have differential effects on restoring synaptic plasticity under different experimental situations mimicking various human diseases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2017:IMD, author = "Yuansheng Liu and Xiangxiang Zeng and Zengyou He and Quan Zou", title = "Inferring {MicroRNA}-Disease Associations by Random Walk on a Heterogeneous Network with Multiple Data Sources", journal = j-TCBB, volume = "14", number = "4", pages = "905--915", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550432", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Since the discovery of the regulatory function of microRNA miRNA, increased attention has focused on identifying the relationship between miRNA and disease. It has been suggested that computational method is an efficient way to identify potential disease-related miRNAs for further confirmation using biological experiments. In this paper, we first highlighted three limitations commonly associated with previous computational methods. To resolve these limitations, we established disease similarity subnetwork and miRNA similarity subnetwork by integrating multiple data sources, where the disease similarity is composed of disease semantic similarity and disease functional similarity, and the miRNA similarity is calculated using the miRNA-target gene and miRNA-lncRNA long non-coding RNA associations. Then, a heterogeneous network was constructed by connecting the disease similarity subnetwork and the miRNA similarity subnetwork using the known miRNA-disease associations. We extended random walk with restart to predict miRNA-disease associations in the heterogeneous network. The leave-one-out cross-validation achieved an average area under the curve AUC of $ 0.8049 $ across $ 341 $ diseases and $ 476 $ miRNAs. For five-fold cross-validation, our method achieved an AUC from $ 0.7970 $ to $ 0.9249 $ for $ 15 $ human diseases. Case studies further demonstrated the feasibility of our method to discover potential miRNA-disease associations. An online service for prediction is freely available at http://ifmda.aliapp.com.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2017:IIS, author = "Min Li and Zhongxiang Liao and Yiming He and Jianxin Wang and Junwei Luo and Yi Pan", title = "{ISEA}: Iterative Seed-Extension Algorithm for {De} {Novo} Assembly Using Paired-End Information and Insert Size Distribution", journal = j-TCBB, volume = "14", number = "4", pages = "916--925", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550433", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The purpose of de novo assembly is to report more contiguous, complete, and less error prone contigs. Thanks to the advent of the next generation sequencing NGS technologies, the cost of producing high depth reads is reduced greatly. However, due to the disadvantages of NGS, de novo assembly has to face the difficulties brought by repeat regions, error rate, and low sequencing coverage in some regions. Although many de novo algorithms have been proposed to solve these problems, the de novo assembly still remains a challenge. In this article, we developed an iterative seed-extension algorithm for de novo assembly, called ISEA. To avoid the negative impact induced by error rate, ISEA utilizes reads overlap and paired-end information to correct error reads before assemblying. During extending seeds in a De Bruijn graph, ISEA uses an elaborately designed score function based on paired-end information and the distribution of insert size to solve the repeat region problem. By employing the distribution of insert size, the score function can also reduce the influence of error reads. In scaffolding, ISEA adopts a relaxed strategy to join contigs that were terminated for low coverage during the extension. The performance of ISEA was compared with six previous popular assemblers on four real datasets. The experimental results demonstrate that ISEA can effectively obtain longer and more accurate scaffolds.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2017:OIS, author = "Tianle Ma and Aidong Zhang", title = "Omics Informatics: From Scattered Individual Software Tools to Integrated Workflow Management Systems", journal = j-TCBB, volume = "14", number = "4", pages = "926--946", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2535251", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Omic data analyses pose great informatics challenges. As an emerging subfield of bioinformatics, omics informatics focuses on analyzing multi-omic data efficiently and effectively, and is gaining momentum. There are two underlying trends in the expansion of omics informatics landscape: the explosion of scattered individual omics informatics tools with each of which focuses on a specific task in both single- and multi- omic settings, and the fast-evolving integrated software platforms such as workflow management systems that can assemble multiple tools into pipelines and streamline integrative analysis for complicated tasks. In this survey, we give a holistic view of omics informatics, from scattered individual informatics tools to integrated workflow management systems. We not only outline the landscape and challenges of omics informatics, but also sample a number of widely used and cutting-edge algorithms in omics data analysis to give readers a fine-grained view. We survey various workflow management systems WMSs, classify them into three levels of WMSs from simple software toolkits to integrated multi-omic analytical platforms, and point out the emerging needs for developing intelligent workflow management systems. We also discuss the challenges, strategies and some existing work in systematic evaluation of omics informatics tools. We conclude by providing future perspectives of emerging fields and new frontiers in omics informatics.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:PCI, author = "Jianhua Zhang and Zhong Yin and Rubin Wang", title = "Pattern Classification of Instantaneous Cognitive Task-load Through {GMM} Clustering, {Laplacian} Eigenmap, and Ensemble {SVMs}", journal = j-TCBB, volume = "14", number = "4", pages = "947--965", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2561927", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The identification of the temporal variations in human operator cognitive task-load CTL is crucial for preventing possible accidents in human-machine collaborative systems. Recent literature has shown that the change of discrete CTL level during human-machine system operations can be objectively recognized using neurophysiological data and supervised learning technique. The objective of this work is to design subject-specific multi-class CTL classifier to reveal the complex unknown relationship between the operator's task performance and neurophysiological features by combining target class labeling, physiological feature reduction and selection, and ensemble classification techniques. The psychophysiological data acquisition experiments were performed under multiple human-machine process control tasks. Four or five target classes of CTL were determined by using a Gaussian mixture model and three human performance variables. By using Laplacian eigenmap, a few salient EEG features were extracted, and heart rates were used as the input features of the CTL classifier. Then, multiple support vector machines were aggregated via majority voting to create an ensemble classifier for recognizing the CTL classes. Finally, the obtained CTL classification results were compared with those of several existing methods. The results showed that the proposed methods are capable of deriving a reasonable number of target classes and low-dimensional optimal EEG features for individual human operator subjects.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2017:PND, author = "Liang Yu and Ruidan Su and Bingbo Wang and Long Zhang and Yapeng Zou and Jing Zhang and Lin Gao", title = "Prediction of Novel Drugs for Hepatocellular Carcinoma Based on Multi-Source Random Walk", journal = j-TCBB, volume = "14", number = "4", pages = "966--977", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550453", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational approaches for predicting drug-disease associations by integrating gene expression and biological network provide great insights to the complex relationships among drugs, targets, disease genes, and diseases at a system level. Hepatocellular carcinoma HCC is one of the most common malignant tumors with a high rate of morbidity and mortality. We provide an integrative framework to predict novel d rugs for HCC based on multi-source random walk PD-MRW. Firstly, based on gene expression and protein interaction network, we construct a gene-gene weighted interaction network GWIN. Then, based on multi-source random walk in GWIN, we build a drug-drug similarity network. Finally, based on the known drugs for HCC, we score all drugs in the drug-drug similarity network. The robustness of our predictions, their overlap with those reported in Comparative Toxicogenomics Database CTD and literatures, and their enriched KEGG pathway demonstrate our approach can effectively identify new drug indications. Specifically, regorafenib Rank = 9 in top-20 list is proven to be effective in Phase I and II clinical trials of HCC, and the Phase III trial is ongoing. And, it has 11 overlapping pathways with HCC with lower p-values. Focusing on a particular disease, we believe our approach is more accurate and possesses better scalability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Stalidzans:2017:SMS, author = "Egils Stalidzans and Ivars Mozga and Jurijs Sulins and Peteris Zikmanis", title = "Search for a Minimal Set of Parameters by Assessing the Total Optimization Potential for a Dynamic Model of a Biochemical Network", journal = j-TCBB, volume = "14", number = "4", pages = "978--985", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2550451", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Selecting an efficient small set of adjustable parameters to improve metabolic features of an organism is important for a reduction of implementation costs and risks of unpredicted side effects. In practice, to avoid the analysis of a huge combinatorial space for the possible sets of adjustable parameters, experience-, and intuition-based subsets of parameters are often chosen, possibly leaving some interesting counter-intuitive combinations of parameters unrevealed. The combinatorial scan of possible adjustable parameter combinations at the model optimization level is possible; however, the number of analyzed combinations is still limited. The total optimization potential TOP approach is proposed to assess the full potential for increasing the value of the objective function by optimizing all possible adjustable parameters. This seemingly unpractical combination of adjustable parameters allows assessing the maximum attainable value of the objective function and stopping the combinatorial space scanning when the desired fraction of TOP is reached and any further increase in the number of adjustable parameters cannot bring any reasonable improvement. The relation between the number of adjustable parameters and the reachable fraction of TOP is a valuable guideline in choosing a rational solution for industrial implementation. The TOP approach is demonstrated on the basis of two case studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dinc:2017:STS, author = "Imren Dinc and Semih Dinc and Madhav Sigdel and Madhu S. Sigdel and Marc L. Pusey and Ramazan S. Aygun", title = "Super-Thresholding: Supervised Thresholding of Protein Crystal Images", journal = j-TCBB, volume = "14", number = "4", pages = "986--998", month = jul, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2542811", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Oct 3 16:58:45 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In general, a single thresholding technique is developed or enhanced to separate foreground objects from background for a domain of images. This idea may not generate satisfactory results for all images in a dataset, since different images may require different types of thresholding methods for proper binarization or segmentation. To overcome this limitation, in this study, we propose a novel approach called ``super-thresholding'' that utilizes a supervised classifier to decide an appropriate thresholding method for a specific image. This method provides a generic framework that allows selection of the best thresholding method among different thresholding techniques that are beneficial for the problem domain. A classifier model is built using features extracted priori from the original image only or posteriori by analyzing the outputs of thresholding methods and the original image. This model is applied to identify the thresholding method for new images of the domain. We performed our method on protein crystallization images, and then we compared our results with six thresholding techniques. Numerical results are provided using four different correctness measurements. Super-thresholding outperforms the best single thresholding method around 10 percent, and it gives the best performance for protein crystallization dataset in our experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Catalyurek:2017:GEI, author = "Umit V. Catalyurek", title = "{Guest Editor}'s Introduction: Selected Papers from {ACM-BCB 2014}", journal = j-TCBB, volume = "14", number = "5", pages = "1000--1001", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2722158", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special issue were presented at the 5th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics, held in Newport Beach, CA in September 2014, The papers address the use of computational modeling in the biological and health research fields. With the new high throughput devices, such as sequencers and imaging devices, and ubiquitous sensor technologies, the landscape of how we do biomedical research; how the knowledge is curated; and results are delivered to stake holders are constantly changing.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gorecki:2017:UDD, author = "Pawel Pawel Gorecki and Jaroslaw Paszek and Oliver Eulenstein", title = "Unconstrained Diameters for Deep Coalescence", journal = j-TCBB, volume = "14", number = "5", pages = "1002--1012", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520937", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The minimizing-deep-coalescence MDC approach infers a median species tree for a given set of gene trees under the deep coalescence cost. This cost accounts for the minimum number of deep coalescences needed to reconcile a gene tree with a species tree where the leaf-genes are mapped to the leaf-species through a function called leaf labeling. In order to better understand the MDC approach we investigate here the diameter of a gene tree, which is an important property of the deep coalescence cost. This diameter is the maximal deep coalescence costs for a given gene tree under all leaf labelings for each possible species tree topology. While we prove that this diameter is generally infinite, this result relies on the diameter's unrealistic assumption that species trees can be of infinite size. Providing a more practical definition, we introduce a natural extension of the gene tree diameter that constrains the species tree size by a given constant. For this new diameter, we describe an exact formula, present a complete classification of the trees yielding this diameter, derive formulas for its mean and variance, and demonstrate its ability using comparative studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2017:SLF, author = "Zhiyong Wang and Benika Hall and Jinbo Xu and Xinghua Shi", title = "A Sparse Learning Framework for Joint Effect Analysis of Copy Number Variants", journal = j-TCBB, volume = "14", number = "5", pages = "1013--1027", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2462332", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Copy number variants CNVs, including large deletions and duplications, represent an unbalanced change of DNA segments. Abundant in human genomes, CNVs contribute to a large proportion of human genetic diversity, with impact on many human phenotypes. Although recent advances in genetic studies have shed light on the impact of individual CNVs on different traits, the analysis of joint effect of multiple interactive CNVs lags behind from many perspectives. A primary reason is that the large number of CNV combinations and interactions in the human genome make it computationally challenging to perform such joint analysis. To address this challenge, we developed a novel sparse learning framework that combines sparse learning with biological networks to identify interacting CNVs with joint effect on particular traits. We showed that our approach performs well in identifying CNVs with joint phenotypic effect using simulated data. Applied to a real human genomic dataset from the 1,000 Genomes Project, our approach identified multiple CNVs that collectively contribute to population differentiation. We found a set of multiple CNVs that have joint effect in different populations, and affect gene expression differently in distinct populations. These results provided a collection of CNVs that likely have downstream biomedical implications in individuals from diverse population backgrounds.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{DeBlasio:2017:LPA, author = "Dan DeBlasio and John Kececioglu", title = "Learning Parameter-Advising Sets for Multiple Sequence Alignment", journal = j-TCBB, volume = "14", number = "5", pages = "1028--1041", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2430323", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "While the multiple sequence alignment output by an aligner strongly depends on the parameter values used for the alignment scoring function such as the choice of gap penalties and substitution scores, most users rely on the single default parameter setting provided by the aligner. A different parameter setting, however, might yield a much higher-quality alignment for the specific set of input sequences. The problem of picking a good choice of parameter values for specific input sequences is called parameter advising. A parameter advisor has two ingredients: i a set of parameter choices to select from, and ii an estimator that provides an estimate of the accuracy of the alignment computed by the aligner using a parameter choice. The parameter advisor picks the parameter choice from the set whose resulting alignment has highest estimated accuracy. In this paper, we consider for the first time the problem of learning the optimal set of parameter choices for a parameter advisor that uses a given accuracy estimator. The optimal set is one that maximizes the expected true accuracy of the resulting parameter advisor, averaged over a collection of training data. While we prove that learning an optimal set for an advisor is NP-complete, we show there is a natural approximation algorithm for this problem, and prove a tight bound on its approximation ratio. Experiments with an implementation of this approximation algorithm on biological benchmarks, using various accuracy estimators from the literature, show it finds sets for advisors that are surprisingly close to optimal. Furthermore, the resulting parameter advisors are significantly more accurate in practice than simply aligning with a single default parameter choice.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ritz:2017:PAS, author = "Anna Ritz and Brendan Avent and T. M. Murali", title = "Pathway Analysis with Signaling Hypergraphs", journal = j-TCBB, volume = "14", number = "5", pages = "1042--1055", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2459681", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Signaling pathways play an important role in the cell's response to its environment. Signaling pathways are often represented as directed graphs, which are not adequate for modeling reactions such as complex assembly and dissociation, combinatorial regulation, and protein activation/inactivation. More accurate representations such as directed hypergraphs remain underutilized. In this paper, we present an extension of a directed hypergraph that we call a signaling hypergraph. We formulate a problem that asks what proteins and interactions must be involved in order to stimulate a specific response downstream of a signaling pathway. We relate this problem to computing the shortest acyclic $B$-hyperpath in a signaling hypergraph-an NP-hard problem-and present a mixed integer linear program to solve it. We demonstrate that the shortest hyperpaths computed in signaling hypergraphs are far more informative than shortest paths, Steiner trees, and subnetworks containing many short paths found in corresponding graph representations. Our results illustrate the potential of signaling hypergraphs as an improved representation of signaling pathways and motivate the development of novel hypergraph algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yoo:2017:IIK, author = "Boyoung Yoo and Fazle Elahi Faisal and Huili Chen and Tijana Milenkovic", title = "Improving Identification of Key Players in Aging via Network De-Noising and Core Inference", journal = j-TCBB, volume = "14", number = "5", pages = "1056--1069", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495170", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Current ``ground truth'' knowledge about human aging has been obtained by transferring aging-related knowledge from well-studied model species via sequence homology or by studying human gene expression data. Since proteins function by interacting with each other, analyzing protein-protein interaction PPI networks in the context of aging is promising. Unlike existing static network research of aging, since cellular functioning is dynamic, we recently integrated the static human PPI network with aging-related gene expression data to form dynamic, age-specific networks. Then, we predicted as key players in aging those proteins whose network topologies significantly changed with age. Since current networks are noisy , here, we use link prediction to de-noise the human network and predict improved key players in aging from the de-noised data. Indeed, de-noising gives more significant overlap between the predicted data and the ``ground truth'' aging-related data. Yet, we obtain novel predictions, which we validate in the literature. Also, we improve the predictions by an alternative strategy: removing ``redundant'' edges from the age-specific networks and using the resulting age-specific network ``cores'' to study aging. We produce new knowledge from dynamic networks encompassing multiple data types, via network de-noising or core inference, complementing the existing knowledge obtained from sequence or expression data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Azofeifa:2017:AAA, author = "Joseph G. Azofeifa and Mary A. Allen and Manuel E. Lladser and Robin D. Dowell", title = "An Annotation Agnostic Algorithm for Detecting Nascent {RNA} Transcripts in {GRO-Seq}", journal = j-TCBB, volume = "14", number = "5", pages = "1070--1081", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520919", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We present a fast and simple algorithm to detect nascent RNA transcription in global nuclear run-on sequencing GRO-seq. GRO-seq is a relatively new protocol that captures nascent transcripts from actively engaged polymerase, providing a direct read-out on bona fide transcription. Most traditional assays, such as RNA-seq, measure steady state RNA levels which are affected by transcription, post-transcriptional processing, and RNA stability. GRO-seq data, however, presents unique analysis challenges that are only beginning to be addressed. Here, we describe a new algorithm, Fast Read Stitcher FStitch, that takes advantage of two popular machine-learning techniques, hidden Markov models and logistic regression, to classify which regions of the genome are transcribed. Given a small user-defined training set, our algorithm is accurate, robust to varying read depth, annotation agnostic, and fast. Analysis of GRO-seq data without a priori need for annotation uncovers surprising new insights into several aspects of the transcription process.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Goodacre:2017:PND, author = "Norman Goodacre and Nathan Edwards and Mark Danielsen and Peter Uetz and Cathy Wu", title = "Predicting {nsSNPs} that Disrupt Protein-Protein Interactions Using Docking", journal = j-TCBB, volume = "14", number = "5", pages = "1082--1093", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2520931", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The human genome contains a large number of protein polymorphisms due to individual genome variation. How many of these polymorphisms lead to altered protein-protein interaction is unknown. We have developed a method to address this question. The intersection of the SKEMPI database of affinity constants among interacting proteins and CAPRI 4.0 docking benchmark was docked using HADDOCK, leading to a training set of 166 mutant pairs. A random forest classifier based on the differences in resulting docking scores between the 166 mutant pairs and their wild-types was used, to distinguish between variants that have either completely or partially lost binding ability. Fifty percent of non-binders were correctly predicted with a false discovery rate of only 2 percent. The model was tested on a set of 15 HIV-1 --- human, as well as seven human- human glioblastoma-related, mutant protein pairs: 50 percent of combined non-binders were correctly predicted with a false discovery rate of 10 percent. The model was also used to identify 10 protein-protein interactions between human proteins and their HIV-1 partners that are likely to be abolished by rare non-synonymous single-nucleotide polymorphisms nsSNPs. These nsSNPs may represent novel and potentially therapeutically-valuable targets for anti-viral therapy by disruption of viral binding.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2017:IIP, author = "Dan He and Zhanyong Wang and Laxmi Parida and Eleazar Eskin", title = "{IPED2}: Inheritance Path Based Pedigree Reconstruction Algorithm for Complicated Pedigrees", journal = j-TCBB, volume = "14", number = "5", pages = "1094--1103", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2688439", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstruction of family trees, or pedigree reconstruction, for a group of individuals is a fundamental problem in genetics. The problem is known to be NP-hard even for datasets known to only contain siblings. Some recent methods have been developed to accurately and efficiently reconstruct pedigrees. These methods, however, still consider relatively simple pedigrees, for example, they are not able to handle half-sibling situations where a pair of individuals only share one parent. In this work, we propose an efficient method, IPED2, based on our previous work, which specifically targets reconstruction of complicated pedigrees that include half-siblings. We note that the presence of half-siblings makes the reconstruction problem significantly more challenging which is why previous methods exclude the possibility of half-siblings. We proposed a novel model as well as an efficient graph algorithm and experiments show that our algorithm achieves relatively accurate reconstruction. To our knowledge, this is the first method that is able to handle pedigree reconstruction from genotype data when half-sibling exists in any generation of the pedigree.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2017:GES, author = "De-Shuang Huang and Vitoantonio Bevilacqua and M. Michael Gromiha", title = "Guest Editorial for Special Section on the {11th International Conference on Intelligent Computing ICIC}", journal = j-TCBB, volume = "14", number = "5", pages = "1104--1105", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2677098", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section were presented at the 11th International Conference on Intelligent Computing ICIC held in Fuzhou, China, on August 20-23, 2015. This conference was formed to provide an annual forum dedicated to the emerging and challenging topics in artificial intelligence, machine learning, bioinformatics, etc. It aims to bring together researchers and practitioners from both academia and industry to share ideas, problems, and solutions related to the multifaceted aspects of intelligent computing.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2017:DCR, author = "Hongjie Wu and Kun Wang and Liyao Lu and Yu Xue and Qiang Lyu and Min Jiang", title = "Deep Conditional Random Field Approach to Transmembrane Topology Prediction and Application to {GPCR} Three-Dimensional Structure Modeling", journal = j-TCBB, volume = "14", number = "5", pages = "1106--1114", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2602872", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Transmembrane proteins play important roles in cellular energy production, signal transmission, and metabolism. Many shallow machine learning methods have been applied to transmembrane topology prediction, but the performance was limited by the large size of membrane proteins and the complex biological evolution information behind the sequence. In this paper, we proposed a novel deep approach based on conditional random fields named as dCRF-TM for predicting the topology of transmembrane proteins. Conditional random fields take into account more complicated interrelation between residue labels in full-length sequence than HMM and SVM-based methods. Three widely-used datasets were employed in the benchmark. DCRF-TM had the accuracy 95 percent over helix location prediction and the accuracy 78 percent over helix number prediction. DCRF-TM demonstrated a more robust performance on large size proteins {$>$350} residues against 11 state-of-the-art predictors. Further dCRF-TM was applied to ab initio modeling three-dimensional structures of seven-transmembrane receptors, also known as G protein-coupled receptors. The predictions on 24 solved G protein-coupled receptors and unsolved vasopressin V2 receptor illustrated that dCRF-TM helped abGPCR-I-TASSER to improve TM-score 34.3 percent rather than using the random transmembrane definition. Two out of five predicted models caught the experimental verified disulfide bonds in vasopressin V2 receptor.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ge:2017:CSD, author = "Shu-Guang Ge and Junfeng Xia and Wen Sha and Chun-Hou Zheng", title = "Cancer Subtype Discovery Based on Integrative Model of Multigenomic Data", journal = j-TCBB, volume = "14", number = "5", pages = "1115--1121", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2621769", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One major goal of large-scale cancer omics study is to understand molecular mechanisms of cancer and find new biomedical targets. To deal with the high-dimensional multidimensional cancer omics data DNA methylation, mRNA expression, etc., which can be used to discover new insight on identifying cancer subtypes, clustering methods are usually used to find an effective low-dimensional subspace of the original data and then cluster cancer samples in the reduced subspace. However, due to data-type diversity and big data volume, few methods can integrate these data and map them into an effective low-dimensional subspace. In this paper, we develop a dimension-reduction and data-integration method for indentifying cancer subtypes, named Scluster. First, Scluster, respectively, projects the different original data into the principal subspaces by an adaptive sparse reduced-rank regression method. Then, a fused patient-by-patient network is obtained for these subgroups through a scaled exponential similarity kernel method. Finally, candidate cancer subtypes are identified using spectral clustering method. We demonstrate the efficiency of our Scluster method using three cancers by jointly analyzing mRNA expression, miRNA expression, and DNA methylation data. The evaluation results and analyses show that Scluster is effective for predicting survival and identifies novel cancer subtypes of large-scale multi-omics data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bao:2017:CPS, author = "Wenzheng Bao and Dong Wang and Yuehui Chen", title = "Classification of Protein Structure Classes on Flexible Neutral Tree", journal = j-TCBB, volume = "14", number = "5", pages = "1122--1133", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2610967", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate classification on protein structural is playing an important role in Bioinformatics. An increase in evidence demonstrates that a variety of classification methods have been employed in such a field. In this research, the features of amino acids composition, secondary structure's feature, and correlation coefficient of amino acid dimers and amino acid triplets have been used. Flexible neutral tree FNT, a particular tree structure neutral network, has been employed as the classification model in the protein structures' classification framework. Considering different feature groups owing diverse roles in the model, impact factors of different groups have been put forward in this research. In order to evaluate different impact factors, Impact Factors Scaling IFS algorithm, which aim at reducing redundant information of the selected features in some degree, have been put forward. To examine the performance of such framework, the 640, 1189, and ASTRAL datasets are employed as the low-homology protein structure benchmark datasets. Experimental results demonstrate that the performance of the proposed method is better than the other methods in the low-homology protein tertiary structures.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2017:ECR, author = "Qingfeng Chen and Chaowang Lan and Baoshan Chen and Lusheng Wang and Jinyan Li and Chengqi Zhang", title = "Exploring Consensus {RNA} Substructural Patterns Using Subgraph Mining", journal = j-TCBB, volume = "14", number = "5", pages = "1134--1146", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2645202", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Frequently recurring RNAid=``Q1''{$>$} structural motifs play important roles in RNA folding process and interaction with other molecules. Traditional index-based and shape-based schemas are useful in modeling RNA secondary structures but ignore the structural discrepancy of individual RNA family member. Further, the in-depth analysis of underlying substructure pattern is insufficient due to varied and unnormalized substructure data. This prevents us from understanding RNAs functions and their inherent synergistic regulation networks. This article thus proposes a novel labeled graph-based algorithm RnaGraph to uncover frequently RNA substructure patterns. Attribute data and graph data are combined to characterize diverse substructures and their correlations, respectively. Further, a top-k graph pattern mining algorithm is developed to extract interesting substructure motifs by integrating frequency and similarity. The experimental results show that our methods assist in not only modelling complex RNA secondary structures but also identifying hidden but interesting RNA substructure patterns.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Deng:2017:ISK, author = "Su-Ping Deng and Shaolong Cao and De-Shuang Huang and Yu-Ping Wang", title = "Identifying Stages of Kidney Renal Cell Carcinoma by Combining Gene Expression and {DNA} Methylation Data", journal = j-TCBB, volume = "14", number = "5", pages = "1147--1153", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2607717", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this study, in order to take advantage of complementary information from different types of data for better disease status diagnosis, we combined gene expression with DNA methylation data and generated a fused network, based on which the stages of Kidney Renal Cell Carcinoma KIRC can be better identified. It is well recognized that a network is important for investigating the connectivity of disease groups. We exploited the potential of the network's features to identify the KIRC stage. We first constructed a patient network from each type of data. We then built a fused network based on network fusion method. Based on the link weights of patients, we used a generalized linear model to predict the group of KIRC subjects. Finally, the group prediction method was applied to test the power of network-based features. The performance e.g., the accuracy of identifying cancer stages when using the fused network from two types of data is shown to be superior to that when using two patient networks from only one data type. The work provides a good example for using network based features from multiple data types for a more comprehensive diagnosis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yuan:2017:NPB, author = "Lin Yuan and Lin Zhu and Wei-Li Guo and Xiaobo Zhou and Youhua Zhang and Zhenhua Huang and De-Shuang Huang", title = "Nonconvex Penalty Based Low-Rank Representation and Sparse Regression for {eQTL} Mapping", journal = j-TCBB, volume = "14", number = "5", pages = "1154--1164", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2609420", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper addresses the problem of accounting for confounding factors and expression quantitative trait loci eQTL mapping in the study of SNP-gene associations. The existing convex penalty based algorithm has limited capacity to keep main information of matrix in the process of reducing matrix rank. We present an algorithm, which use nonconvex penalty based low-rank representation to account for confounding factors and make use of sparse regression for eQTL mapping NCLRS. The efficiency of the presented algorithm is evaluated by comparing the results of 18 synthetic datasets given by NCLRS and presented algorithm, respectively. The experimental results or biological dataset show that our approach is an effective tool to account for non-genetic effects than currently existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2017:PSP, author = "Jian-Qiang Li and Zhu-Hong You and Xiao Li and Zhong Ming and Xing Chen", title = "{PSPEL}: In Silico Prediction of Self-Interacting Proteins from Amino Acids Sequences Using Ensemble Learning", journal = j-TCBB, volume = "14", number = "5", pages = "1165--1172", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2649529", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Self interacting proteins SIPs play an important role in various aspects of the structural and functional organization of the cell. Detecting SIPs is one of the most important issues in current molecular biology. Although a large number of SIPs data has been generated by experimental methods, wet laboratory approaches are both time-consuming and costly. In addition, they yield high false negative and positive rates. Thus, there is a great need for in silico methods to predict SIPs accurately and efficiently. In this study, a new sequence-based method is proposed to predict SIPs. The evolutionary information contained in Position-Specific Scoring Matrix PSSM is extracted from of protein with known sequence. Then, features are fed to an ensemble classifier to distinguish the self-interacting and non-self-interacting proteins. When performed on Saccharomyces cerevisiae and Human SIPs data sets, the proposed method can achieve high accuracies of 86.86 and 91.30 percent, respectively. Our method also shows a good performance when compared with the SVM classifier and previous methods. Consequently, the proposed method can be considered to be a novel promising tool to predict SIPs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2017:HBH, author = "Pu Wang and Ruiquan Ge and Xuan Xiao and Manli Zhou and Fengfeng Zhou", title = "{hMuLab}: a Biomedical Hybrid {MUlti-LABel} Classifier Based on Multiple Linear Regression", journal = j-TCBB, volume = "14", number = "5", pages = "1173--1180", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2603507", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many biomedical classification problems are multi-label by nature, e.g., a gene involved in a variety of functions and a patient with multiple diseases. The majority of existing classification algorithms assumes each sample with only one class label, and the multi-label classification problem remains to be a challenge for biomedical researchers. This study proposes a novel multi-label learning algorithm, hMuLab, by integrating both feature-based and neighbor-based similarity scores. The multiple linear regression modeling techniques make hMuLab capable of producing multiple label assignments for a query sample. The comparison results over six commonly-used multi-label performance measurements suggest that hMuLab performs accurately and stably for the biomedical datasets, and may serve as a complement to the existing literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Muki-Marttunen:2017:AMB, author = "Tuomo Muki-Marttunen", title = "An Algorithm for Motif-Based Network Design", journal = j-TCBB, volume = "14", number = "5", pages = "1181--1186", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A determinant property of the structure of a biological network is the distribution of local connectivity patterns, i.e., network motifs. In this work, a method for creating directed, unweighted networks while promoting a certain combination of motifs is presented. This motif-based network algorithm starts with an empty graph and randomly connects the nodes by advancing or discouraging the formation of chosen motifs. The in- or out-degree distribution of the generated networks can be explicitly chosen. The algorithm is shown to perform well in producing networks with high occurrences of the targeted motifs, both ones consisting of three nodes as well as ones consisting of four nodes. Moreover, the algorithm can also be tuned to bring about global network characteristics found in many natural networks, such as small-worldness and modularity.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2017:EBS, author = "Lichun Ma and Debby D. Wang and Bin Zou and Hong Yan", title = "An Eigen-Binding Site Based Method for the Analysis of Anti-{EGFR} Drug Resistance in Lung Cancer Treatment", journal = j-TCBB, volume = "14", number = "5", pages = "1187--1194", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2568184", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We explore the drug resistance mechanism in non-small cell lung cancer treatment by characterizing the drug-binding site of a protein mutant based on local surface and energy features. These features are transformed to an eigen-binding site space and used for drug resistance level prediction and analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{dAcierno:2017:IID, author = "Antonio dAcierno", title = "{IsAProteinDB}: an Indexed Database of Trypsinized Proteins for Fast Peptide Mass Fingerprinting", journal = j-TCBB, volume = "14", number = "5", pages = "1195--1201", month = sep, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2564964", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In peptite mass fingerprinting, an unknown protein is fragmented into smaller peptides whose masses are accurately measured; the obtained list of weights is then compared with a reference database to obtain a set of matching proteins. The exponential growth of known proteins discourage the use of brute force methods, where the weights' list is compared with each protein in the reference collection; luckily, the scientific literature in the database field highlights that well designed searching algorithms, coupled with a proper data organization, allow to quickly solve the identification problem even on standard desktop computers. In this paper, IsAProteinsDB, an indexed database of trypsinized proteins, is presented. The corresponding search algorithm shows a time complexity that does not significantly depends on the size of the reference protein database.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kawam:2017:SSH, author = "Ahmad {Al Kawam} and Sunil Khatri and Aniruddha Datta", title = "A Survey of Software and Hardware Approaches to Performing Read Alignment in Next Generation Sequencing", journal = j-TCBB, volume = "14", number = "6", pages = "1202--1213", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586070", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational genomics is an emerging field that is enabling us to reveal the origins of life and the genetic basis of diseases such as cancer. Next Generation Sequencing NGS technologies have unleashed a wealth of genomic information by producing immense amounts of raw data. Before any functional analysis can be applied to this data, read alignment is applied to find the genomic coordinates of the produced sequences. Alignment algorithms have evolved rapidly with the advancement in sequencing technology, striving to achieve biological accuracy at the expense of increasing space and time complexities. Hardware approaches have been proposed to accelerate the computational bottlenecks created by the alignment process. Although several hardware approaches have achieved remarkable speedups, most have overlooked important biological features, which have hampered their widespread adoption by the genomics community. In this paper, we provide a brief biological introduction to genomics and NGS. We discuss the most popular next generation read alignment tools and algorithms. Furthermore, we provide a comprehensive survey of the hardware implementations used to accelerate these algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sirin:2017:BMT, author = "Utku Sirin and Faruk Polat and Reda Alhajj", title = "Batch Mode {TD$ \lambda $} for Controlling Partially Observable Gene Regulatory Networks", journal = j-TCBB, volume = "14", number = "6", pages = "1214--1227", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2595577", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "External control of gene regulatory networks GRNs has received much attention in recent years. The aim is to find a series of actions to apply to a gene regulation system making it avoid its diseased states. In this work, we propose a novel method for controlling partially observable GRNs combining batch mode reinforcement learning Batch RL and TD$ \lambda $ algorithms. Unlike the existing studies inferring a computational model from gene expression data, and obtaining a control policy over the constructed model, our idea is to interpret the time series gene expression data as a sequence of observations that the system produced, and obtain an approximate stochastic policy directly from the gene expression data without estimation of the internal states of the partially observable environment. Thereby, we get rid of the most time consuming phases of the existing studies, inferring a model and running the model for the control. Results show that our method is able to provide control solutions for regulation systems of several thousands of genes only in seconds, whereas existing studies cannot solve control problems of even a few dozens of genes. Results also show that our approximate stochastic policies are almost as good as the policies generated by the existing studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{L:2017:BDW, author = "Biji C. L. and Achuthsankar S. Nair", title = "Benchmark Dataset for Whole Genome Sequence Compression", journal = j-TCBB, volume = "14", number = "6", pages = "1228--1236", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2568186", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The research in DNA data compression lacks a standard dataset to test out compression tools specific to DNA. This paper argues that the current state of achievement in DNA compression is unable to be benchmarked in the absence of such scientifically compiled whole genome sequence dataset and proposes a benchmark dataset using multistage sampling procedure. Considering the genome sequence of organisms available in the National Centre for Biotechnology and Information NCBI as the universe, the proposed dataset selects 1,105 prokaryotes, 200 plasmids, 164 viruses, and 65 eukaryotes. This paper reports the results of using three established tools on the newly compiled dataset and show that their strength and weakness are evident only with a comparison based on the scientifically compiled benchmark dataset. Availability: The sample dataset and the respective links are available at \url{https://sourceforge.net/projects/benchmarkdnacompressiondataset/}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{doNascimento:2017:CNV, author = "Francisco do Nascimento and Katia S. Guimaraes", title = "Copy Number Variations Detection: Unravelling the Problem in Tangible Aspects", journal = j-TCBB, volume = "14", number = "6", pages = "1237--1250", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576441", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the midst of the important genomic variants associated to the susceptibility and resistance to complex diseases, Copy Number Variations CNV has emerged as a prevalent class of structural variation. Following the flood of next-generation sequencing data, numerous tools publicly available have been developed to provide computational strategies to identify CNV at improved accuracy. This review goes beyond scrutinizing the main approaches widely used for structural variants detection in general, including Split-Read, Paired-End Mapping, Read-Depth, and Assembly-based. In this paper, 1 we characterize the relevant technical details around the detection of CNV, which can affect the estimation of breakpoints and number of copies, 2 we pinpoint the most important insights related to GC-content and mappability biases, and 3 we discuss the paramount caveats in the tools evaluation process. The points brought out in this study emphasize common assumptions, a variety of possible limitations, valuable insights, and directions for desirable contributions to the state-of-the-art in CNV detection tools.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ceri:2017:DMH, author = "Stefano Ceri and Abdulrahman Kaitoua and Marco Masseroli and Pietro Pinoli and Francesco Venco", title = "Data Management for Heterogeneous Genomic Datasets", journal = j-TCBB, volume = "14", number = "6", pages = "1251--1264", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576447", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Next Generation Sequencing NGS, a family of technologies for reading DNA and RNA, is changing biological research, and will soon change medical practice, by quickly providing sequencing data and high-level features of numerous individual genomes in different biological and clinical conditions. The availability of millions of whole genome sequences may soon become the biggest and most important ``big data'' problem of mankind. In this exciting framework, we recently proposed a new paradigm to raise the level of abstraction in NGS data management, by introducing a GenoMetric Query Language GMQL and demonstrating its usefulness through several biological query examples. Leveraging on that effort, here we motivate and formalize GMQL operations, especially focusing on the most characteristic and domain-specific ones. Furthermore, we address their efficient implementation and illustrate the architecture of the new software system that we have developed for their execution on big genomic data in a cloud computing environment, providing the evaluation of its performance. The new system implementation is available for download at the GMQL website http://www.bioinformatics.deib.polimi.it/GMQL/; GMQL can also be tested through a set of predefined queries on ENCODE and Roadmap Epigenomics data at http://www.bioinformatics.deib.polimi.it/GMQL/queries/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Adl:2017:DPI, author = "Amin Ahmadi Adl and Hye-Seung Lee and Xiaoning Qian", title = "Detecting Pairwise Interactive Effects of Continuous Random Variables for Biomarker Identification with Small Sample Size", journal = j-TCBB, volume = "14", number = "6", pages = "1265--1275", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586042", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Aberrant changes to interactions among cellular components have been conjectured to be potential causes of abnormalities in cellular functions. By systematic analysis of high-throughput-omics data, researchers hope to detect potential associations among measured variables for better biomarker identification and phenotype prediction. In this paper, we focus on the methods to measure pairwise interactive effects among continuous random variables, representing molecular expressions, with respect to a given categorical outcome. Together with a comprehensive review on the existing measures, we further propose new measures that better estimate interactive effects, especially in small sample size scenarios. We first evaluate the performance of the existing and new methods for both small and large sample sizes based on simulated datasets that shows our proposed methods outperform previous methods in general. The best performing method for small sample size scenarios suggested by simulation experiments is then implemented to estimate interactive effects among genes with respect to the metastasis outcome in two breast cancer studies based on micro-array gene expression datasets. Our results further demonstrate that integrating detected interactive effects together with individual effects can help in finding more accurate biomarkers for breast cancer metastasis, which are indeed involved in important pathways related to cancer metastasis based on gene set enrichment analysis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Grewal:2017:EAO, author = "Nivit Grewal and Shailendra Singh and Trilok Chand", title = "Effect of Aggregation Operators on Network-Based Disease Gene Prioritization: a Case Study on Blood Disorders", journal = j-TCBB, volume = "14", number = "6", pages = "1276--1287", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2599155", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Owing to the innate noise in the biological data sources, a single source or a single measure do not suffice for an effective disease gene prioritization. So, the integration of multiple data sources or aggregation of multiple measures is the need of the hour. The aggregation operators combine multiple related data values to a single value such that the combined value has the effect of all the individual values. In this paper, an attempt has been made for applying the fuzzy aggregation on the network-based disease gene prioritization and investigate its effect under noise conditions. This study has been conducted for a set of 15 blood disorders by fusing four different network measures, computed from the protein interaction network, using a selected set of aggregation operators and ranking the genes on the basis of the aggregated value. The aggregation operator-based rankings have been compared with the ``Random walk with restart'' gene prioritization method. The impact of noise has also been investigated by adding varying proportions of noise to the seed set. The results reveal that for all the selected blood disorders, the Mean of Maximal operator has relatively outperformed the other aggregation operators for noisy as well as non-noisy data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2017:EPC, author = "Gui-Jun Zhang and Xiao-Gen Zhou and Xu-Feng Yu and Xiao-Hu Hao and Li Yu", title = "Enhancing Protein Conformational Space Sampling Using Distance Profile-Guided Differential Evolution", journal = j-TCBB, volume = "14", number = "6", pages = "1288--1301", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2566617", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "De novo protein structure prediction aims to search for low-energy conformations as it follows the thermodynamics hypothesis that places native conformations at the global minimum of the protein energy surface. However, the native conformation is not necessarily located in the lowest-energy regions owing to the inaccuracies of the energy model. This study presents a differential evolution algorithm using distance profile-based selection strategy to sample conformations with reasonable structure effectively. In the proposed algorithm, besides energy, the residue-residue distance is considered another measure of the conformation. The average distance errors of decoys between the distance of each residue pair and the corresponding distance in the distance profiles are first calculated when the trial conformation yields a larger energy value than that of the target. Then, the distance acceptance probability of the trial conformation is designed based on distance profiles if the trial conformation obtains a lower average distance error compared with that of the target conformation. The trial conformation is accepted to the next generation in accordance with its distance acceptance probability. By using the dual constraints of energy and distance in guiding sampling, the algorithm can sample conformations with lower energies and more reasonable structures. Experimental results of 28 benchmark proteins show that the proposed algorithm can effectively predict near-native protein structures.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Aparicio:2017:EAG, author = "David Aparicio and Pedro Ribeiro and Fernando Silva", title = "Extending the Applicability of Graphlets to Directed Networks", journal = j-TCBB, volume = "14", number = "6", pages = "1302--1315", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586046", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With recent advances in high-throughput cell biology, the amount of cellular biological data has grown drastically. Such data is often modeled as graphs also called networks and studying them can lead to new insights into molecule-level organization. A possible way to understand their structure is by analyzing the smaller components that constitute them, namely network motifs and graphlets. Graphlets are particularly well suited to compare networks and to assess their level of similarity due to the rich topological information that they offer but are almost always used as small undirected graphs of up to five nodes, thus limiting their applicability in directed networks. However, a large set of interesting biological networks such as metabolic, cell signaling, or transcriptional regulatory networks are intrinsically directional, and using metrics that ignore edge direction may gravely hinder information extraction. Our main purpose in this work is to extend the applicability of graphlets to directed networks by considering their edge direction, thus providing a powerful basis for the analysis of directed biological networks. We tested our approach on two network sets, one composed of synthetic graphs and another of real directed biological networks, and verified that they were more accurately grouped using directed graphlets than undirected graphlets. It is also evident that directed graphlets offer substantially more topological information than simple graph metrics such as degree distribution or reciprocity. However, enumerating graphlets in large networks is a computationally demanding task. Our implementation addresses this concern by using a state-of-the-art data structure, the g-trie, which is able to greatly reduce the necessary computation. We compared our tool to other state-of-the art methods and verified that it is the fastest general tool for graphlet counting.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Stegmayer:2017:HCI, author = "Georgina Stegmayer and Cristian Yones and Laura Kamenetzky and Diego H. Milone", title = "High Class-Imbalance in pre-{miRNA} Prediction: a Novel Approach Based on {deepSOM}", journal = j-TCBB, volume = "14", number = "6", pages = "1316--1326", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576459", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The computational prediction of novel microRNA within a full genome involves identifying sequences having the highest chance of being a miRNA precursor pre-miRNA. These sequences are usually named candidates to miRNA. The well-known pre-miRNAs are usually only a few in comparison to the hundreds of thousands of potential candidates to miRNA that have to be analyzed, which makes this task a high class-imbalance classification problem. The classical way of approaching it has been training a binary classifier in a supervised manner, using well-known pre-miRNAs as positive class and artificially defining the negative class. However, although the selection of positive labeled examples is straightforward, it is very difficult to build a set of negative examples in order to obtain a good set of training samples for a supervised method. In this work, we propose a novel and effective way of approaching this problem using machine learning, without the definition of negative examples. The proposal is based on clustering unlabeled sequences of a genome together with well-known miRNA precursors for the organism under study, which allows for the quick identification of the best candidates to miRNA as those sequences clustered with known precursors. Furthermore, we propose a deep model to overcome the problem of having very few positive class labels. They are always maintained in the deep levels as positive class while less likely pre-miRNA sequences are filtered level after level. Our approach has been compared with other methods for pre-miRNAs prediction in several species, showing effective predictivity of novel miRNAs. Additionally, we will show that our approach has a lower training time and allows for a better graphical navegability and interpretation of the results. A web-demo interface to try deepSOM is available at http://fich.unl.edu.ar/sinc/web-demo/deepsom/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Akkasi:2017:IBN, author = "Abbas Akkasi and Ekrem Varoglu", title = "Improving Biochemical Named Entity Recognition Using {PSO} Classifier Selection and {Bayesian} Combination Methods", journal = j-TCBB, volume = "14", number = "6", pages = "1327--1338", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2570216", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Named Entity Recognition NER is a basic step for large number of consequent text mining tasks in the biochemical domain. Increasing the performance of such recognition systems is of high importance and always poses a challenge. In this study, a new community based decision making system is proposed which aims at increasing the efficiency of NER systems in the chemical/drug name context. Particle Swarm Optimization PSO algorithm is chosen as the expert selection strategy along with the Bayesian combination method to merge the outputs of the selected classifiers as well as evaluate the fitness of the selected candidates. The proposed system performs in two steps. The first step focuses on creating various numbers of baseline classifiers for NER with different features sets using the Conditional Random Fields CRFs. The second step involves the selection and efficient combination of the classifiers using PSO and Bayesisan combination. Two comprehensive corpora from BioCreative events, namely ChemDNER and CEMP, are used for the experiments conducted. Results show that the ensemble of classifiers selected by means of the proposed approach perform better than the single best classifier as well as ensembles formed using other popular selection/combination strategies for both corpora. Furthermore, the proposed method outperforms the best performing system at the Biocreative IV ChemDNER track by achieving an F-score of 87.95 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bittig:2017:MSH, author = "Arne T. Bittig and Adelinde M. Uhrmacher", title = "{ML-Space}: Hybrid Spatial {Gillespie} and Particle Simulation of Multi-Level Rule-Based Models in Cell Biology", journal = j-TCBB, volume = "14", number = "6", pages = "1339--1349", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2598162", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Spatio-temporal dynamics of cellular processes can be simulated at different levels of detail, from deterministic partial differential equations via the spatial Stochastic Simulation algorithm to tracking Brownian trajectories of individual particles. We present a spatial simulation approach for multi-level rule-based models, which includes dynamically hierarchically nested cellular compartments and entities. Our approach ML-Space combines discrete compartmental dynamics, stochastic spatial approaches in discrete space, and particles moving in continuous space. The rule-based specification language of ML-Space supports concise and compact descriptions of models and to adapt the spatial resolution of models easily.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kang:2017:MBB, author = "Mingon Kang and Juyoung Park and Dong-Chul Kim and Ashis K. Biswas and Chunyu Liu and Jean Gao", title = "Multi-Block Bipartite Graph for Integrative Genomic Analysis", journal = j-TCBB, volume = "14", number = "6", pages = "1350--1358", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591521", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Human diseases involve a sequence of complex interactions between multiple biological processes. In particular, multiple genomic data such as Single Nucleotide Polymorphism SNP, Copy Number Variation CNV, DNA Methylation DM, and their interactions simultaneously play an important role in human diseases. However, despite the widely known complex multi-layer biological processes and increased availability of the heterogeneous genomic data, most research has considered only a single type of genomic data. Furthermore, recent integrative genomic studies for the multiple genomic data have also been facing difficulties due to the high-dimensionality and complexity, especially when considering their intra- and inter-block interactions. In this paper, we introduce a novel multi-block bipartite graph and its inference methods, MB2I and sMB2I, for the integrative genomic study. The proposed methods not only integrate multiple genomic data but also incorporate intra/inter-block interactions by using a multi-block bipartite graph. In addition, the methods can be used to predict quantitative traits e.g., gene expression, survival time from the multi-block genomic data. The performance was assessed by simulation experiments that implement practical situations. We also applied the method to the human brain data of psychiatric disorders. The experimental results were analyzed by maximum edge biclique and biclustering, and biological findings were discussed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Weyenberg:2017:NKB, author = "Grady Weyenberg and Ruriko Yoshida and Daniel Howe", title = "Normalizing Kernels in the {Billera--Holmes--Vogtmann} Treespace", journal = j-TCBB, volume = "14", number = "6", pages = "1359--1365", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2565475", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "As costs of genome sequencing have dropped precipitously, development of efficient bioinformatic methods to analyze genome structure and evolution have become ever more urgent. For example, most published phylogenomic studies involve either massive concatenation of sequences, or informal comparisons of phylogenies inferred on a small subset of orthologous genes, neither of which provides a comprehensive overview of evolution or systematic identification of genes with unusual and interesting evolution e.g., horizontal gene transfers, gene duplication, and subsequent neofunctionalization. We are interested in identifying such ``outlying'' gene trees from the set of gene trees and estimating the distribution of trees over the ``tree space''. This paper describes an improvement to the kdetrees algorithm, an adaptation of classical kernel density estimation to the metric space of phylogenetic trees Billera-Holmes-Vogtman treespace, whereby the kernel normalizing constants, are estimated through the use of the novel holonomic gradient methods. As in the original kdetrees paper, we have applied kdetrees to a set of Apicomplexa genes. The analysis identified several unreliable sequence alignments that had escaped previous detection, as well as a gene independently reported as a possible case of horizontal gene transfer. The updated version of the kdetrees software package is available both from CRAN the official R package system, as well as from the official development repository on Github. github.com/grady/kdetrees.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ding:2017:NMM, author = "Yuchun Ding and Marie Christine Pardon and Alessandra Agostini and Henryk Faas and Jinming Duan and Wil O. C. Ward and Felicity Easton and Dorothee Auer and Li Bai", title = "Novel Methods for Microglia Segmentation, Feature Extraction, and Classification", journal = j-TCBB, volume = "14", number = "6", pages = "1366--1377", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591520", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Segmentation and analysis of histological images provides a valuable tool to gain insight into the biology and function of microglial cells in health and disease. Common image segmentation methods are not suitable for inhomogeneous histology image analysis and accurate classification of microglial activation states has remained a challenge. In this paper, we introduce an automated image analysis framework capable of efficiently segmenting microglial cells from histology images and analyzing their morphology. The framework makes use of variational methods and the fast-split Bregman algorithm for image denoising and segmentation, and of multifractal analysis for feature extraction to classify microglia by their activation states. Experiments show that the proposed framework is accurate and scalable to large datasets and provides a useful tool for the study of microglial biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Savel:2017:PEL, author = "Daniel Savel and Thomas LaFramboise and Ananth Grama and Mehmet Koyuturk", title = "{Pluribus} --- Exploring the Limits of Error Correction Using a Suffix Tree", journal = j-TCBB, volume = "14", number = "6", pages = "1378--1388", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586060", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Next generation sequencing technologies enable efficient and cost-effective genome sequencing. However, sequencing errors increase the complexity of the de novo assembly process, and reduce the quality of the assembled sequences. Many error correction techniques utilizing substring frequencies have been developed to mitigate this effect. In this paper, we present a novel and effective method called Pluribus, for correcting sequencing errors using a generalized suffix trie. Pluribus utilizes multiple manifestations of an error in the trie to accurately identify errors and suggest corrections. We show that Pluribus produces the least number of false positives across a diverse set of real sequencing datasets when compared to other methods. Furthermore, Pluribus can be used in conjunction with other contemporary error correction methods to achieve higher levels of accuracy than either tool alone. These increases in error correction accuracy are also realized in the quality of the contigs that are generated during assembly. We explore, in-depth, the behavior of Pluribus , to explain the observed improvement in accuracy and assembly performance. Pluribus is freely available at http://compbio.case.edu/pluribus/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2017:PPD, author = "Jun Hu and Yang Li and Ming Zhang and Xibei Yang and Hong-Bin Shen and Dong-Jun Yu", title = "Predicting Protein-{DNA} Binding Residues by Weightedly Combining Sequence-Based Features and Boosting Multiple {SVMs}", journal = j-TCBB, volume = "14", number = "6", pages = "1389--1398", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2616469", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-DNA interactions are ubiquitous in a wide variety of biological processes. Correctly locating DNA-binding residues solely from protein sequences is an important but challenging task for protein function annotations and drug discovery, especially in the post-genomic era where large volumes of protein sequences have quickly accumulated. In this study, we report a new predictor, named TargetDNA, for targeting protein-DNA binding residues from primary sequences. TargetDNA uses a protein's evolutionary information and its predicted solvent accessibility as two base features and employs a centered linear kernel alignment algorithm to learn the weights for weightedly combining the two features. Based on the weightedly combined feature, multiple initial predictors with SVM as classifiers are trained by applying a random under-sampling technique to the original dataset, the purpose of which is to cope with the severe imbalance phenomenon that exists between the number of DNA-binding and non-binding residues. The final ensembled predictor is obtained by boosting the multiple initially trained predictors. Experimental simulation results demonstrate that the proposed TargetDNA achieves a high prediction performance and outperforms many existing sequence-based protein-DNA binding residue predictors. The TargetDNA web server and datasets are freely available at http://csbio.njust.edu.cn/bioinf/TargetDNA/ for academic use.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhong:2017:PII, author = "Jiancheng Zhong and Jianxing Wang and Xiaojun Ding and Zhen Zhang and Min Li and Fang-Xiang Wu and Yi Pan", title = "Protein Inference from the Integration of Tandem {MS} Data and Interactome Networks", journal = j-TCBB, volume = "14", number = "6", pages = "1399--1409", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2601618", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Since proteins are digested into a mixture of peptides in the preprocessing step of tandem mass spectrometry MS, it is difficult to determine which specific protein a shared peptide belongs to. In recent studies, besides tandem MS data and peptide identification information, some other information is exploited to infer proteins. Different from the methods which first use only tandem MS data to infer proteins and then use network information to refine them, this study proposes a protein inference method named TMSIN, which uses interactome networks directly. As two interacting proteins should co-exist, it is reasonable to assume that if one of the interacting proteins is confidently inferred in a sample, its interacting partners should have a high probability in the same sample, too. Therefore, we can use the neighborhood information of a protein in an interactome network to adjust the probability that the shared peptide belongs to the protein. In TMSIN, a multi-weighted graph is constructed by incorporating the bipartite graph with interactome network information, where the bipartite graph is built with the peptide identification information. Based on multi-weighted graphs, TMSIN adopts an iterative workflow to infer proteins. At each iterative step, the probability that a shared peptide belongs to a specific protein is calculated by using the Bayes' law based on the neighbor protein support scores of each protein which are mapped by the shared peptides. We carried out experiments on yeast data and human data to evaluate the performance of TMSIN in terms of ROC, q-value, and accuracy. The experimental results show that AUC scores yielded by TMSIN are 0.742 and 0.874 in yeast dataset and human dataset, respectively, and TMSIN yields the maximum number of true positives when q-value less than or equal to 0.05. The overlap analysis shows that TMSIN is an effective complementary approach for protein inference.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gu:2017:RGS, author = "Deqing Gu and Xingxing Jian and Cheng Zhang and Qiang Hua", title = "Reframed Genome-Scale Metabolic Model to Facilitate Genetic Design and Integration with Expression Data", journal = j-TCBB, volume = "14", number = "6", pages = "1410--1418", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576456", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome-scale metabolic network models GEMs have played important roles in the design of genetically engineered strains and helped biologists to decipher metabolism. However, due to the complex gene-reaction relationships that exist in model systems, most algorithms have limited capabilities with respect to directly predicting accurate genetic design for metabolic engineering. In particular, methods that predict reaction knockout strategies leading to overproduction are often impractical in terms of gene manipulations. Recently, we proposed a method named logical transformation of model LTM to simplify the gene-reaction associations by introducing intermediate pseudo reactions, which makes it possible to generate genetic design. Here, we propose an alternative method to relieve researchers from deciphering complex gene-reactions by adding pseudo gene controlling reactions. In comparison to LTM, this new method introduces fewer pseudo reactions and generates a much smaller model system named as gModel. We showed that gModel allows two seldom reported applications: identification of minimal genomes and design of minimal cell factories within a modified OptKnock framework. In addition, gModel could be used to integrate expression data directly and improve the performance of the E-Fmin method for predicting fluxes. In conclusion, the model transformation procedure will facilitate genetic research based on GEMs, extending their applications.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Maji:2017:SFS, author = "Pradipta Maji and Ekta Shah", title = "Significance and Functional Similarity for Identification of Disease Genes", journal = j-TCBB, volume = "14", number = "6", pages = "1419--1433", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2598163", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the most significant research issues in functional genomics is insilico identification of disease related genes. In this regard, the paper presents a new gene selection algorithm, termed as SiFS, for identification of disease genes. It integrates the information obtained from interaction network of proteins and gene expression profiles. The proposed SiFS algorithm culls out a subset of genes from microarray data as disease genes by maximizing both significance and functional similarity of the selected gene subset. Based on the gene expression profiles, the significance of a gene with respect to another gene is computed using mutual information. On the other hand, a new measure of similarity is introduced to compute the functional similarity between two genes. Information derived from the protein-protein interaction network forms the basis of the proposed SiFS algorithm. The performance of the proposed gene selection algorithm and new similarity measure, is compared with that of other related methods and similarity measures, using several cancer microarray data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qi:2017:SCM, author = "Zhen Qi and Eberhard O. Voit", title = "Strategies for Comparing Metabolic Profiles: Implications for the Inference of Biochemical Mechanisms from Metabolomics Data", journal = j-TCBB, volume = "14", number = "6", pages = "1434--1445", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586065", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Background: Large amounts of metabolomics data have been accumulated in recent years and await analysis. Previously, we had developed a systems biology approach to infer biochemical mechanisms underlying metabolic alterations observed in cancers and other diseases. The method utilized the typical Euclidean distance for comparing metabolic profiles. Here, we ask whether any of the numerous alternative metrics might serve this purpose better. Methods and Findings: We used enzymatic alterations in purine metabolism that were measured in human renal cell carcinoma to test various metrics with the goal of identifying the best metrics for discerning metabolic profiles of healthy and diseased individuals. The results showed that several metrics have similarly good performance, but that some are unsuited for comparisons of metabolic profiles. Furthermore, the results suggest that relative changes in metabolite levels, which reduce bias toward large metabolite concentrations, are better suited for comparisons of metabolic profiles than absolute changes. Finally, we demonstrate that a sequential search for enzymatic alterations, ranked by importance, is not always valid. Conclusions: We identified metrics that are appropriate for comparisons of metabolic profiles. In addition, we constructed strategic guidelines for the algorithmic identification of biochemical mechanisms from metabolomics data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mohammadi:2017:TAT, author = "Shahin Mohammadi and David F. Gleich and Tamara G. Kolda and Ananth Grama", title = "Triangular Alignment {TAME}: a Tensor-Based Approach for Higher-Order Network Alignment", journal = j-TCBB, volume = "14", number = "6", pages = "1446--1458", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2595583", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Network alignment has extensive applications in comparative interactomics. Traditional approaches aim to simultaneously maximize the number of conserved edges and the underlying similarity of aligned entities. We propose a novel formulation of the network alignment problem that extends topological similarity to higher-order structures and provides a new objective function that maximizes the number of aligned substructures. This objective function corresponds to an integer programming problem, which is NP-hard. Consequently, we identify a closely related surrogate function whose maximization results in a tensor eigenvector problem. Based on this formulation, we present an algorithm called Triangular AlignMEnt TAME, which attempts to maximize the number of aligned triangles across networks. Using a case study on the NAPAbench dataset, we show that triangular alignment is capable of producing mappings with high node correctness. We further evaluate our method by aligning yeast and human interactomes. Our results indicate that TAME outperforms the state-of-art alignment methods in terms of conserved triangles. In addition, we show that the number of conserved triangles is more significantly correlated, compared to the conserved edge, with node correctness and co-expression of edges. Our formulation and resulting algorithms can be easily extended to arbitrary motifs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2017:UBM, author = "Yun Liu and Tao Hou and Bing Kang and Fu Liu", title = "Unsupervised Binning of Metagenomic Assembled Contigs Using Improved Fuzzy {C}-Means Method", journal = j-TCBB, volume = "14", number = "6", pages = "1459--1467", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576452", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Metagenomic contigs binning is a necessary step of metagenome analysis. After assembly, the number of contigs belonging to different genomes is usually unequal. So a metagenomic contigs dataset is a kind of imbalanced dataset and traditional fuzzy c-means method FCM fails to handle it very well. In this paper, we will introduce an improved version of fuzzy c-means method IFCM into metagenomic contigs binning. First, tetranucleotide frequencies are calculated for every contig. Second, the number of bins is roughly estimated by the distribution of genome lengths of a complete set of non-draft sequenced microbial genomes from NCBI. Then, IFCM is used to cluster DNA contigs with the estimated result. Finally, a clustering validity function is utilized to determine the binning result. We tested this method on a synthetic and two real datasets and experimental results have showed the effectiveness of this method compared with other tools.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2017:CPD, author = "Jiawei Luo and Pingjian Ding and Cheng Liang and Buwen Cao and Xiangtao Chen", title = "Collective Prediction of Disease-Associated {miRNAs} Based on Transduction Learning", journal = j-TCBB, volume = "14", number = "6", pages = "1468--1475", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2599866", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The discovery of human disease-related miRNA is a challenging problem for complex disease biology research. For existing computational methods, it is difficult to achieve excellent performance with sparse known miRNA-disease association verified by biological experiment. Here, we develop CPTL, a Collective Prediction based on Transduction Learning, to systematically prioritize miRNAs related to disease. By combining disease similarity, miRNA similarity with known miRNA-disease association, we construct a miRNA-disease network for predicting miRNA-disease association. Then, CPTL calculates relevance score and updates the network structure iteratively, until a convergence criterion is reached. The relevance score of node including miRNA and disease is calculated by the use of transduction learning based on its neighbors. The network structure is updated using relevance score, which increases the weight of important links. To show the effectiveness of our method, we compared CPTL with existing methods based on HMDD datasets. Experimental results indicate that CPTL outperforms existing approaches in terms of AUC, precision, recall, and F1-score. Moreover, experiments performed with different number of iterations verify that CPTL has good convergence. Besides, it is analyzed that the varying of weighted parameters affect predicted results. Case study on breast cancer has further confirmed the identification ability of CPTL.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dureau:2017:MIA, author = "Maxime Dureau and Angelo Alessandri and Patrizia Bagnerini and Stephane Vincent", title = "Modeling and Identification of Amnioserosa Cell Mechanical Behavior by Using Mass-Spring Lattices", journal = j-TCBB, volume = "14", number = "6", pages = "1476--1481", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586063", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Various mechanical models of live amnioserosa cells during Drosophila melanogaster's dorsal closure are proposed. Such models account for specific biomechanical oscillating behaviors and depend on a different set of parameters. The identification of the parameters for each of the proposed models is accomplished according to a least-squares approach in such a way to best fit the cellular dynamics extracted from live images. For the purpose of comparison, the resulting models after identification are validated to allow for the selection of the most appropriate description of such a cell dynamics. The proposed methodology is general and it may be applied to other planar biological processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lovato:2017:SNR, author = "Pietro Lovato and Marco Cristani and Manuele Bicego", title = "Soft {Ngram} Representation and Modeling for Protein Remote Homology Detection", journal = j-TCBB, volume = "14", number = "6", pages = "1482--1488", month = nov, year = "2017", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2595575", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Jan 12 18:05:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Remote homology detection represents a central problem in bioinformatics, where the challenge is to detect functionally related proteins when their sequence similarity is low. Recent solutions employ representations derived from the sequence profile, obtained by replacing each amino acid of the sequence by the corresponding most probable amino acid in the profile. However, the information contained in the profile could be exploited more deeply, provided that there is a representation able to capture and properly model such crucial evolutionary information. In this paper, we propose a novel profile-based representation for sequences, called soft Ngram. This representation, which extends the traditional Ngram scheme obtained by grouping N consecutive amino acids, permits considering all of the evolutionary information in the profile: this is achieved by extracting Ngrams from the whole profile, equipping them with a weight directly computed from the corresponding evolutionary frequencies. We illustrate two different approaches to model the proposed representation and to derive a feature vector, which can be effectively used for classification using a support vector machine SVM. A thorough evaluation on three benchmarks demonstrates that the new approach outperforms other Ngram-based methods, and shows very promising results also in comparison with a broader spectrum of techniques.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2018:GMS, author = "Yanbo Wang and Weikang Qian and Bo Yuan", title = "A Graphical Model of Smoking-Induced Global Instability in Lung Cancer", journal = j-TCBB, volume = "15", number = "1", pages = "1--14", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2599867", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Smoking is the major cause of lung cancer and the leading cause of cancer-related death in the world. The most current view about lung cancer is no longer limited to individual genes being mutated by any carcinogenic insults from smoking. Instead, tumorigenesis is a phenotype conferred by many systematic and global alterations, leading to extensive heterogeneity and variation for both the genotypes and phenotypes of individual cancer cells. Thus, strategically it is foremost important to develop a methodology to capture any consistent and global alterations presumably shared by most of the cancerous cells for a given population. This is particularly true that almost all of the data collected from solid cancers including lung cancers are usually distant apart over a large span of temporal or even spatial contexts. Here, we report a multiple non-Gaussian graphical model to reconstruct the gene interaction network using two previously published gene expression datasets. Our graphical model aims to selectively detect gross structural changes at the level of gene interaction networks. Our methodology is extensively validated, demonstrating good robustness, as well as the selectivity and specificity expected based on our biological insights. In summary, gene regulatory networks are still relatively stable during presumably the early stage of neoplastic transformation. But drastic structural differences can be found between lung cancer and its normal control, including the gain of functional modules for cellular proliferations such as EGFR and PDGFRA, as well as the lost of the important IL6 module, supporting their roles as potential drug targets. Interestingly, our method can also detect early modular changes, with the ALDH3A1 and its associated interactions being strongly implicated as a potential early marker, whose activations appear to alter LCN2 module as well as its interactions with the important TP53-MDM2 circuitry. Our strategy using the graphical model to reconstruct gene interaction work with biologically-inspired constraints exemplifies the importance and beauty of biology in developing any bio-computational approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jansson:2018:AMR, author = "Jesper Jansson and Ramesh Rajaby and Chuanqi Shen and Wing-Kin Sung", title = "Algorithms for the Majority Rule + Consensus Tree and the Frequency Difference Consensus Tree", journal = j-TCBB, volume = "15", number = "1", pages = "15--26", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2609923", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This article presents two new deterministic algorithms for constructing consensus trees. Given an input of $k$ phylogenetic trees with identical leaf label sets and $n$ leaves each, the first algorithm constructs the majority rule + consensus tree in $ O k n$ time, which is optimal since the input size is $ \Omega k n$, and the second one constructs the frequency difference consensus tree in $ \min \lbrace O k n^2, O k n k + \log^2 n \rbrace $ time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vyas:2018:AGP, author = "Renu Vyas and Sanket Bapat and Purva Goel and Muthukumarasamy Karthikeyan and Sanjeev S. Tambe and Bhaskar D. Kulkarni", title = "Application of Genetic Programming {GP} Formalism for Building Disease Predictive Models from Protein--Protein Interactions {PPI} Data", journal = j-TCBB, volume = "15", number = "1", pages = "27--37", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2621042", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-protein interactions PPIs play a vital role in the biological processes involved in the cell functions and disease pathways. The experimental methods known to predict PPIs require tremendous efforts and the results are often hindered by the presence of a large number of false positives. Herein, we demonstrate the use of a new Genetic Programming GP based Symbolic Regression SR approach for predicting PPIs related to a disease. In this case study, a dataset consisting of 135 PPI complexes related to cancer was used to construct a generic PPI predicting model with good PPI prediction accuracy and generalization ability. A high correlation coefficient CC magnitude of 0.893, and low root mean square error RMSE, and mean absolute percentage error MAPE values of 478.221 and 0.239, respectively, were achieved for both the training and test set outputs. To validate the discriminatory nature of the model, it was applied on a dataset of diabetes complexes where it yielded significantly low CC values. Thus, the GP model developed here serves a dual purpose: a a predictor of the binding energy of cancer related PPI complexes, and b a classifier for discriminating PPI complexes related to cancer from those of other diseases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2018:ARE, author = "Bin Hu and Xiaowei Li and Shuting Sun and Martyn Ratcliffe", title = "Attention Recognition in {EEG}-Based Affective Learning Research Using {CFS + KNN} Algorithm", journal = j-TCBB, volume = "15", number = "1", pages = "38--45", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2616395", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The research detailed in this paper focuses on the processing of Electroencephalography EEG data to identify attention during the learning process. The identification of affect using our procedures is integrated into a simulated distance learning system that provides feedback to the user with respect to attention and concentration. The authors propose a classification procedure that combines correlation-based feature selection CFS and a k-nearest-neighbor KNN data mining algorithm. To evaluate the CFS+KNN algorithm, it was test against CFS+C4.5 algorithm and other classification algorithms. The classification performance was measured 10 times with different 3-fold cross validation data. The data was derived from 10 subjects while they were attempting to learn material in a simulated distance learning environment. A self-assessment model of self-report was used with a single valence to evaluate attention on 3 levels high, neutral, low. It was found that CFS+KNN had a much better performance, giving the highest correct classification rate CCR of $ 80.84 \pm 3.0 $ \% for the valence dimension divided into three classes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ushakov:2018:BLB, author = "Anton V. Ushakov and Xenia Klimentova and Igor Vasilyev", title = "Bi-level and Bi-objective $p$-Median Type Problems for Integrative Clustering: Application to Analysis of Cancer Gene-Expression and Drug-Response Data", journal = j-TCBB, volume = "15", number = "1", pages = "46--59", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2622692", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent advances in high-throughput technologies have given rise to collecting large amounts of multidimensional heterogeneous data that provide diverse information on the same biological samples. Integrative analysis of such multisource datasets may reveal new biological insights into complex biological mechanisms and therefore remains an important research field in systems biology. Most of the modern integrative clustering approaches rely on independent analysis of each dataset and consensus clustering, probabilistic or statistical modeling, while flexible distance-based integrative clustering techniques are sparsely covered. We propose two distance-based integrative clustering frameworks based on bi-level and bi-objective extensions of the p-median problem. A hybrid branch-and-cut method is developed to find global optimal solutions to the bi-level p-median model. As to the bi-objective problem, an $ \varepsilon $-constraint algorithm is proposed to generate an approximation to the Pareto optimal set. Every solution found by any of the frameworks corresponds to an integrative clustering. We present an application of our approaches to integrative analysis of NCI-60 human tumor cell lines characterized by gene expression and drug activity profiles. We demonstrate that the proposed mathematical optimization-based approaches outperform some state-of-the-art and traditional distance-based integrative and non-integrative clustering techniques.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Viswanath:2018:CET, author = "Narayanan C. Viswanath", title = "Calculating the Expected Time to Eradicate {HIV-1} Using a {Markov} Chain", journal = j-TCBB, volume = "15", number = "1", pages = "60--67", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2619342", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this study, the expected time required to eradicate HIV-1 completely was found as the conditional absorbing time in a finite state space continuous-time Markov chain model. The Markov chain has two absorbing states: one corresponds to HIV eradication and another representing the possible disaster. This method allowed us to calculate the expected eradication time by solving systems of linear equations. To overcome the challenge of huge dimension of the problem, we applied a novel stop and resume technique. This technique also helped to stop the numerical computation whenever we wanted and continue later from that point until the final result was obtained. Our numerical study showed the dependence of the expected eradication time of HIV on the half-life of the latently infected cells and there agreed with the previous studies. The study predicted that when the half-life of the latent cells varied from 4.6 to 60 months, it took a mean 4.97 to 31.04 years with a corresponding standard deviation of 0.64 to 3.99 years to eradicate the latent cell reservoir. It also revealed the crucial dependence of eradication time on the initial number of latently infected cells.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Karbalayghareh:2018:CST, author = "Alireza Karbalayghareh and Ulisses Braga-Neto and Jianping Hua and Edward Russell Dougherty", title = "Classification of State Trajectories in Gene Regulatory Networks", journal = j-TCBB, volume = "15", number = "1", pages = "68--82", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2616470", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene-expression-based phenotype classification is used for disease diagnosis and prognosis relating to treatment strategies. The present paper considers classification based on sequential measurements of multiple genes using gene regulatory network GRN modeling. There are two networks, original and mutated, and observations consist of trajectories of network states. The problem is to classify an observation trajectory as coming from either the original or mutated network. GRNs are modeled via probabilistic Boolean networks, which incorporate stochasticity at both the gene and network levels. Mutation affects the regulatory logic. Classification is based upon observing a trajectory of states of some given length. We characterize the Bayes classifier and find the Bayes error for a general PBN and the special case of a single Boolean network affected by random perturbations BNp. The Bayes error is related to network sensitivity, meaning the extent of alteration in the steady-state distribution of the original network owing to mutation. Using standard methods to calculate steady-state distributions is cumbersome and sometimes impossible, so we provide an efficient algorithm and approximations. Extensive simulations are performed to study the effects of various factors, including approximation accuracy. We apply the classification procedure to a p53 BNp and a mammalian cell cycle PBN.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hartmann:2018:CTD, author = "Tom Hartmann and An-Chiang Chu and Martin Middendorf and Matthias Bernt", title = "Combinatorics of Tandem Duplication Random Loss Mutations on Circular Genomes", journal = j-TCBB, volume = "15", number = "1", pages = "83--95", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2613522", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The tandem duplication random loss operation TDRL is an important genome rearrangement operation in metazoan mitochondrial genomes. A TDRL consists of a duplication of a contiguous set of genes in tandem followed by a random loss of one copy of each duplicated gene. This paper presents an analysis of the combinatorics of TDRLs on circular genomes, e.g., the mitochondrial genome. In particular, results on TDRLs for circular genomes and their linear representatives are established. Moreover, the distance between gene orders with respect to linear TDRLs and circular TDRLs is studied. An analysis of the available animal mitochondrial gene orders shows the practical relevance of the theoretical results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hujdurovic:2018:CAF, author = "Ademir Hujdurovic and Ursa Kacar and Martin Milanic and Bernard Ries and Alexandru I. Tomescu", title = "Complexity and Algorithms for Finding a Perfect Phylogeny from Mixed Tumor Samples", journal = j-TCBB, volume = "15", number = "1", pages = "96--108", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2606620", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Hajirasouliha and Raphael WABI 2014 proposed a model for deconvoluting mixed tumor samples measured from a collection of high-throughput sequencing reads. This is related to understanding tumor evolution and critical cancer mutations. In short, their formulation asks to split each row of a binary matrix so that the resulting matrix corresponds to a perfect phylogeny and has the minimum number of rows among all matrices with this property. In this paper, we disprove several claims about this problem, including an NP-hardness proof of it. However, we show that the problem is indeed NP-hard, by providing a different proof. We also prove NP-completeness of a variant of this problem proposed in the same paper. On the positive side, we propose an efficient though not necessarily optimal heuristic algorithm based on coloring co-comparability graphs, and a polynomial time algorithm for solving the problem optimally on matrix instances in which no column is contained in both columns of a pair of conflicting columns. Implementations of these algorithms are freely available at https://github.com/alexandrutomescu/MixedPerfectPhylogeny.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:DEP, author = "Wei Zhang and Jia Xu and Yuanyuan Li and Xiufen Zou", title = "Detecting Essential Proteins Based on Network Topology, Gene Expression Data, and Gene Ontology Information", journal = j-TCBB, volume = "15", number = "1", pages = "109--116", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2615931", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", note = "See correction \cite{Zhang:2018:CDE}.", abstract = "The identification of essential proteins in protein-protein interaction PPI networks is of great significance for understanding cellular processes. With the increasing availability of large-scale PPI data, numerous centrality measures based on network topology have been proposed to detect essential proteins from PPI networks. However, most of the current approaches focus mainly on the topological structure of PPI networks, and largely ignore the gene ontology annotation information. In this paper, we propose a novel centrality measure, called TEO, for identifying essential proteins by combining network topology, gene expression profiles, and GO information. To evaluate the performance of the TEO method, we compare it with five other methods degree, betweenness, NC, Pec, and CowEWC in detecting essential proteins from two different yeast PPI datasets. The simulation results show that adding GO information can effectively improve the predicted precision and that our method outperforms the others in predicting essential proteins.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pizzi:2018:EAS, author = "Cinzia Pizzi and Mattia Ornamenti and Simone Spangaro and Simona E. Rombo and Laxmi Parida", title = "Efficient Algorithms for Sequence Analysis with Entropic Profiles", journal = j-TCBB, volume = "15", number = "1", pages = "117--128", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2620143", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Entropy, being closely related to repetitiveness and compressibility, is a widely used information-related measure to assess the degree of predictability of a sequence. Entropic profiles are based on information theory principles, and can be used to study the under-/over-representation of subwords, by also providing information about the scale of conserved DNA regions. Here, we focus on the algorithmic aspects related to entropic profiles. In particular, we propose linear time algorithms for their computation that rely on suffix-based data structures, more specifically on the truncated suffix tree TST and on the enhanced suffix array ESA. We performed an extensive experimental campaign showing that our algorithms, beside being faster, make it possible the analysis of longer sequences, even for high degrees of resolution, than state of the art algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2018:GNA, author = "Hyunjin Kim and Sang-Min Choi and Sanghyun Park", title = "{GSEH}: a Novel Approach to Select Prostate Cancer-Associated Genes Using Gene Expression Heterogeneity", journal = j-TCBB, volume = "15", number = "1", pages = "129--146", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2618927", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "When a gene shows varying levels of expression among normal people but similar levels in disease patients or shows similar levels of expression among normal people but different levels in disease patients, we can assume that the gene is associated with the disease. By utilizing this gene expression heterogeneity, we can obtain additional information that abets discovery of disease-associated genes. In this study, we used collaborative filtering to calculate the degree of gene expression heterogeneity between classes and then scored the genes on the basis of the degree of gene expression heterogeneity to find ``differentially predicted'' genes. Through the proposed method, we discovered more prostate cancer-associated genes than 10 comparable methods. The genes prioritized by the proposed method are potentially significant to biological processes of a disease and can provide insight into them.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:HSB, author = "Jian Zhang and Haiting Chai and Bo Gao and Guifu Yang and Zhiqiang Ma", title = "{HEMEsPred}: Structure-Based Ligand-Specific Heme Binding Residues Prediction by Using Fast-Adaptive Ensemble Learning Scheme", journal = j-TCBB, volume = "15", number = "1", pages = "147--156", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2615010", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Heme is an essential biomolecule that widely exists in numerous extant organisms. Accurately identifying heme binding residues HEMEs is of great importance in disease progression and drug development. In this study, a novel predictor named HEMEsPred was proposed for predicting HEMEs. First, several sequence- and structure-based features, including amino acid composition, motifs, surface preferences, and secondary structure, were collected to construct feature matrices. Second, a novel fast-adaptive ensemble learning scheme was designed to overcome the serious class-imbalance problem as well as to enhance the prediction performance. Third, we further developed ligand-specific models considering that different heme ligands varied significantly in their roles, sizes, and distributions. Statistical test proved the effectiveness of ligand-specific models. Experimental results on benchmark datasets demonstrated good robustness of our proposed method. Furthermore, our method also showed good generalization capability and outperformed many state-of-art predictors on two independent testing datasets. HEMEsPred web server was available at http://www.inforstation.com/HEMEsPred/ for free academic use.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Taha:2018:IFP, author = "Kamal Taha", title = "Inferring the Functions of Proteins from the Interrelationships between Functional Categories", journal = j-TCBB, volume = "15", number = "1", pages = "157--167", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2615608", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This study proposes a new method to determine the functions of an unannotated protein. The proteins and amino acid residues mentioned in biomedical texts associated with an unannotated protein $p$ can be considered as characteristics terms for $p$, which are highly predictive of the potential functions of $p$. Similarly, proteins and amino acid residues mentioned in biomedical texts associated with proteins annotated with a functional category $f$ can be considered as characteristics terms of $f$. We introduce in this paper an information extraction system called IFP_IFC that predicts the functions of an unannotated protein $p$ by representing $p$ and each functional category $f$ by a vector of weights. Each weight reflects the degree of association between a characteristic term and $p$ or a characteristic term and $f$. First, IFP_IFC constructs a network, whose nodes represent the different functional categories, and its edges the interrelationships between the nodes. Then, it determines the functions of $p$ by employing random walks with restarts on the mentioned network. The walker is the vector of $p$ . Finally, $p$ is assigned to the functional categories of the nodes in the network that are visited most by the walker. We evaluated the quality of IFP_IFC by comparing it experimentally with two other systems. Results showed marked improvement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Leale:2018:IUB, author = "Guillermo Leale and Ariel Emilio Baya and Diego H. Milone and Pablo M. Granitto and Georgina Stegmayer", title = "Inferring Unknown Biological Function by Integration of {GO} Annotations and Gene Expression Data", journal = j-TCBB, volume = "15", number = "1", pages = "168--180", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2615960", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Characterizing genes with semantic information is an important process regarding the description of gene products. In spite that complete genomes of many organisms have been already sequenced, the biological functions of all of their genes are still unknown. Since experimentally studying the functions of those genes, one by one, would be unfeasible, new computational methods for gene functions inference are needed. We present here a novel computational approach for inferring biological function for a set of genes with previously unknown function, given a set of genes with well-known information. This approach is based on the premise that genes with similar behaviour should be grouped together. This is known as the guilt-by-association principle. Thus, it is possible to take advantage of clustering techniques to obtain groups of unknown genes that are co-clustered with genes that have well-known semantic information GO annotations. Meaningful knowledge to infer unknown semantic information can therefore be provided by these well-known genes. We provide a method to explore the potential function of new genes according to those currently annotated. The results obtained indicate that the proposed approach could be a useful and effective tool when used by biologists to guide the inference of biological functions for recently discovered genes. Our work sets an important landmark in the field of identifying unknown gene functions through clustering, using an external source of biological input. A simple web interface to this proposal can be found at http://fich.unl.edu.ar/sinc/webdemo/gamma-am/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chlis:2018:ISB, author = "Nikolaos-Kosmas Chlis and Ekaterini S. Bei and Michalis Zervakis", title = "Introducing a Stable Bootstrap Validation Framework for Reliable Genomic Signature Extraction", journal = j-TCBB, volume = "15", number = "1", pages = "181--190", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2633267", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The application of machine learning methods for the identification of candidate genes responsible for phenotypes of interest, such as cancer, is a major challenge in the field of bioinformatics. These lists of genes are often called genomic signatures and their linkage to phenotype associations may form a significant step in discovering the causation between genotypes and phenotypes. Traditional methods that produce genomic signatures from DNA Microarray data tend to extract significantly different lists under relatively small variations of the training data. That instability hinders the validity of research findings and raises skepticism about the reliability of such methods. In this study, a complete framework for the extraction of stable and reliable lists of candidate genes is presented. The proposed methodology enforces stability of results at the validation step and as a result, it is independent of the feature selection and classification methods used. Furthermore, two different statistical tests are performed in order to assess the statistical significance of the observed results. Moreover, the consistency of the signatures extracted by independent executions of the proposed method is also evaluated. The results of this study highlight the importance of stability issues in genomic signatures, beyond their prediction capabilities.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2018:MLR, author = "Eddie Y. T. Ma and Sujeevan Ratnasingham and Stefan C. Kremer", title = "Machine Learned Replacement of {$N$}-Labels for Basecalled Sequences in {DNA} Barcoding", journal = j-TCBB, volume = "15", number = "1", pages = "191--204", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2598752", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This study presents a machine learning method that increases the number of identified bases in Sanger Sequencing. The system post-processes a KB basecalled chromatogram. It selects a recoverable subset of N-labels in the KB-called chromatogram to replace with basecalls A,C,G,T. An N-label correction is defined given an additional read of the same sequence, and a human finished sequence. Corrections are added to the dataset when an alignment determines the additional read and human agree on the identity of the N-label. KB must also rate the replacement with quality value of $ > 60 $ in the additional read. Corrections are only available during system training. Developing the system, nearly 850,000 N-labels are obtained from Barcode of Life Datasystems, the premier database of genetic markers called DNA Barcodes. Increasing the number of correct bases improves reference sequence reliability, increases sequence identification accuracy, and assures analysis correctness. Keeping with barcoding standards, our system maintains an error rate of $ < 1 $ percent. Our system only applies corrections when it estimates low rate of error. Tested on this data, our automation selects and recovers: 79 percent of N-labels from COI animal barcode; 80 percent from matK and rbcL plant barcodes; and 58 percent from non-protein-coding sequences across eukaryotes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jetten:2018:NTB, author = "Laura Jetten and Leo van Iersel", title = "Nonbinary Tree-Based Phylogenetic Networks", journal = j-TCBB, volume = "15", number = "1", pages = "205--217", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2615918", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Rooted phylogenetic networks are used to describe evolutionary histories that contain non-treelike evolutionary events such as hybridization and horizontal gene transfer. In some cases, such histories can be described by a phylogenetic base-tree with additional linking arcs, which can, for example, represent gene transfer events. Such phylogenetic networks are called tree-based. Here, we consider two possible generalizations of this concept to nonbinary networks, which we call tree-based and strictly-tree-based nonbinary phylogenetic networks. We give simple graph-theoretic characterizations of tree-based and strictly-tree-based nonbinary phylogenetic networks. Moreover, we show for each of these two classes that it can be decided in polynomial time whether a given network is contained in the class. Our approach also provides a new view on tree-based binary phylogenetic networks. Finally, we discuss two examples of nonbinary phylogenetic networks in biology and show how our results can be applied to them.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mohsenizadeh:2018:OOB, author = "Daniel N. Mohsenizadeh and Roozbeh Dehghannasiri and Edward R. Dougherty", title = "Optimal Objective-Based Experimental Design for Uncertain Dynamical Gene Networks with Experimental Error", journal = j-TCBB, volume = "15", number = "1", pages = "218--230", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2602873", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In systems biology, network models are often used to study interactions among cellular components, a salient aim being to develop drugs and therapeutic mechanisms to change the dynamical behavior of the network to avoid undesirable phenotypes. Owing to limited knowledge, model uncertainty is commonplace and network dynamics can be updated in different ways, thereby giving multiple dynamic trajectories, that is, dynamics uncertainty. In this manuscript, we propose an experimental design method that can effectively reduce the dynamics uncertainty and improve performance in an interaction-based network. Both dynamics uncertainty and experimental error are quantified with respect to the modeling objective, herein, therapeutic intervention. The aim of experimental design is to select among a set of candidate experiments the experiment whose outcome, when applied to the network model, maximally reduces the dynamics uncertainty pertinent to the intervention objective.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Behinaein:2018:PNS, author = "Behnam Behinaein and Karen Rudie and Waheed Sangrar", title = "{Petri} Net Siphon Analysis and Graph Theoretic Measures for Identifying Combination Therapies in Cancer", journal = j-TCBB, volume = "15", number = "1", pages = "231--243", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2614301", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Epidermal Growth Factor Receptor EGFR signaling to the Ras-MAPK pathway is implicated in the development and progression of cancer and is a major focus of targeted combination therapies. Physiochemical models have been used for identifying and testing the signal-inhibiting potential of targeted therapies; however, their application to larger multi-pathway networks is limited by the availability of experimentally-determined rate and concentration parameters. An alternate strategy for identifying and evaluating drug-targetable nodes is proposed. A physiochemical model of EGFR-Ras-MAPK signaling is implemented and calibrated to experimental data. Essential topological features of the model are converted into a Petri net and nodes that behave as siphons-a structural property of Petri nets-are identified. Siphons represent potential drug-targets since they are unrecoverable if their values fall below a threshold. Centrality measures are then used to prioritize siphons identified as candidate drug-targets. Single and multiple drug-target combinations are identified which correspond to clinically relevant drug targets and exhibit inhibition synergy in physiochemical simulations of EGF-induced EGFR-Ras-MAPK signaling. Taken together, these studies suggest that siphons and centrality analyses are a promising computational strategy to identify and rank drug-targetable nodes in larger networks as they do not require knowledge of the dynamics of the system, but rely solely on topology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Khan:2018:RPR, author = "Shujaat Khan and Imran Naseem and Roberto Togneri and Mohammed Bennamoun", title = "{RAFP-Pred}: Robust Prediction of Antifreeze Proteins Using Localized Analysis of $n$-Peptide Compositions", journal = j-TCBB, volume = "15", number = "1", pages = "244--250", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2617337", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In extreme cold weather, living organisms produce Antifreeze Proteins AFPs to counter the otherwise lethal intracellular formation of ice. Structures and sequences of various AFPs exhibit a high degree of heterogeneity, consequently the prediction of the AFPs is considered to be a challenging task. In this research, we propose to handle this arduous manifold learning task using the notion of localized processing. In particular, an AFP sequence is segmented into two sub-segments each of which is analyzed for amino acid and di-peptide compositions. We propose to use only the most significant features using the concept of information gain IG followed by a random forest classification approach. The proposed RAFP-Pred achieved an excellent performance on a number of standard datasets. We report a high Youden's index sensitivity+specificity-1 value of 0.75 on the standard independent test data set outperforming the AFP-PseAAC, AFP_PSSM, AFP-Pred, and iAFP by a margin of 0.05, 0.06, 0.14, and 0.68, respectively. The verification rate on the UniProKB dataset is found to be 83.19 percent which is substantially superior to the 57.18 percent reported for the iAFP method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Czapla:2018:RSS, author = "Roman Czapla", title = "Random Sets of Stadiums in Square and Collective Behavior of Bacteria", journal = j-TCBB, volume = "15", number = "1", pages = "251--256", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2611676", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Collective motion of swimmers can be detected by hydrodynamic interactions through the effective macroscopic viscosity. It follows from the general hydrodynamics that the effective viscosity of non-dilute random suspensions depends on the shape of particles and of their spacial probabilistic distribution. Therefore, a comparative analysis of disordered and collectively interacting particles of the bacteria shape can be done in terms of the probabilistic geometric parameters which determine the effective viscosity. In this paper, we develop a quantitative criterion to detect the collective behavior of bacteria. This criterion is based on the basic statistic moments $e$-sums or generalized Eisenstein-Rayleigh sums which characterize the high-order correlation functions. The locations and the shape of bacteria are modeled by stadiums randomly embedded in medium without overlapping. These shape models can be considered as improvement of the previous segment model. We calculate the $e$-sums of the simulated disordered sets and of the observed experimental locations of bacteria subtilis. The obtained results show a difference between these two sets that demonstrates the collective motion of bacteria.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Borges:2018:RGS, author = "Vinicius R. P. Borges and Maria Cristina F. de Oliveira and Thais Garcia Silva and Armando Augusto Henriques Vieira and Bernd Hamann", title = "Region Growing for Segmenting Green Microalgae Images", journal = j-TCBB, volume = "15", number = "1", pages = "257--270", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2615606", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We describe a specialized methodology for segmenting 2D microscopy digital images of freshwater green microalgae. The goal is to obtain representative algae shapes to extract morphological features to be employed in a posterior step of taxonomical classification of the species. The proposed methodology relies on the seeded region growing principle and on a fine-tuned filtering preprocessing stage to smooth the input image. A contrast enhancement process then takes place to highlight algae regions on a binary pre-segmentation image. This binary image is also employed to determine where to place the seed points and to estimate the statistical probability distributions that characterize the target regions, i.e., the algae areas and the background, respectively. These preliminary stages produce the required information to set the homogeneity criterion for region growing. We evaluate the proposed methodology by comparing its resulting segmentations with a set of corresponding ground-truth segmentations provided by an expert biologist and also with segmentations obtained with existing strategies. The experimental results show that our solution achieves highly accurate segmentation rates with greater efficiency, as compared with the performance of standard segmentation approaches and with an alternative previous solution, based on level-sets, also specialized to handle this particular problem.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ali:2018:SDS, author = "M. Syed Ali and N. Gunasekaran and Choon Ki Ahn and Peng Shi", title = "Sampled-Data Stabilization for Fuzzy Genetic Regulatory Networks with Leakage Delays", journal = j-TCBB, volume = "15", number = "1", pages = "271--285", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2606477", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper deals with the sampled-data stabilization problem for Takagi-Sugeno T-S fuzzy genetic regulatory networks with leakage delays. A novel Lyapunov-Krasovskii functional LKF is established by the non-uniform division of the delay intervals with triplex and quadruplex integral terms. Using such LKFs for constant and time-varying delay cases, new stability conditions are obtained in the T-S fuzzy framework. Based on this, a new condition for the sampled-data controller design is proposed using a linear matrix inequality representation. A numerical result is provided to show the effectiveness and potential of the developed design method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Suryanto:2018:SCC, author = "Chendra Hadi Suryanto and Hiroto Saigo and Kazuhiro Fukui", title = "Structural Class Classification of {$3$D} Protein Structure Based on Multi-View {$2$D} Images", journal = j-TCBB, volume = "15", number = "1", pages = "286--299", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2603987", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computing similarity or dissimilarity between protein structures is an important task in structural biology. A conventional method to compute protein structure dissimilarity requires structural alignment of the proteins. However, defining one best alignment is difficult, especially when the structures are very different. In this paper, we propose a new similarity measure for protein structure comparisons using a set of multi-view 2D images of 3D protein structures. In this approach, each protein structure is represented by a subspace from the image set. The similarity between two protein structures is then characterized by the canonical angles between the two subspaces. The primary advantage of our method is that precise alignment is not needed. We employed Grassmann Discriminant Analysis GDA as the subspace-based learning in the classification framework. We applied our method for the classification problem of seven SCOP structural classes of protein 3D structures. The proposed method outperformed the k-nearest neighbor method k-NN based on conventional alignment-based methods CE, FATCAT, and TM-align. Our method was also applied to the classification of SCOP folds of membrane proteins, where the proposed method could recognize the fold HEM-binding four-helical bundle f.21 much better than TM-Align.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Anders:2018:IPR, author = "Gerd Anders and Ulrich Hassiepen and Stephan Theisgen and Stephan Heymann and Lionel Muller and Tania Panigada and Daniel Huster and Sergey A. Samsonov", title = "The Intrinsic Pepsin Resistance of Interleukin-8 Can Be Explained from a Combined Bioinformatical and Experimental Approach", journal = j-TCBB, volume = "15", number = "1", pages = "300--308", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2614821", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Interleukin-8 IL-8, CXCL8 is a neutrophil chemotactic factor belonging to the family of chemokines. IL-8 was shown to resist pepsin cleavage displaying its high resistance to this protease. However, the molecular mechanisms underlying this resistance are not fully understood. Using our in-house database containing the data on three-dimensional arrangements of secondary structure elements from the whole Protein Data Bank, we found a striking structural similarity between IL-8 and pepsin inhibitor-3. Such similarity could play a key role in understanding IL-8 resistance to the protease pepsin. To support this hypothesis, we applied pepsin assays confirming that intact IL-8 is not degraded by pepsin in comparison to IL-8 in a denaturated state. Applying 1H-15N Heteronuclear Single Quantum Coherence NMR measurements, we determined the putative regions at IL-8 that are potentially responsible for interactions with the pepsin. The results obtained in this work contribute to the understanding of the resistance of IL-8 to pepsin proteolysis in terms of its structural properties.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2018:NAI, author = "Jiawei Luo and Wei Huang and Buwen Cao", title = "A Novel Approach to Identify the {miRNA--mRNA} Causal Regulatory Modules in Cancer", journal = j-TCBB, volume = "15", number = "1", pages = "309--315", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2612199", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs play an essential role in many biological processes by regulating the target genes, especially in the initiation and development of cancers. Therefore, the identification of the miRNA-mRNA regulatory modules is important for understanding the regulatory mechanisms. Most computational methods only used statistical correlations in predicting miRNA-mRNA modules, and neglected the fact there are causal relationships between miRNAs and their target genes. In this paper, we propose a novel approach called CALM the causal regulatory modules to identify the miRNA-mRNA regulatory modules through integrating the causal interactions and statistical correlations between the miRNAs and their target genes. Our algorithm largely consists of three steps: it first forms the causal regulatory relationships of miRNAs and genes from gene expression profiles and detects the miRNA clusters according to the GO function information of their target genes, then expands each miRNA cluster by greedy adding discarding the target genes to maximize the modularity score. To show the performance of our method, we apply CALM on four datasets including EMT, breast, ovarian, and thyroid cancer and validate our results. The experiment results show that our method can not only outperform the compared method, but also achieve ideal overall performance in terms of the functional enrichment.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bao:2018:AAC, author = "Feng Bao and Yue Deng and Qionghai Dai", title = "{ACID}: Association Correction for Imbalanced Data in {GWAS}", journal = j-TCBB, volume = "15", number = "1", pages = "316--322", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2608819", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome-wide association study GWAS has been widely witnessed as a powerful tool for revealing suspicious loci from various diseases. However, real world GWAS tasks always suffer from the data imbalance problem of sufficient control samples and limited case samples. This imbalance issue can cause serious biases to the result and thus leads to losses of significance for true causal markers. To tackle this problem, we proposed a computational framework to perform association correction for imbalanced data ACID that could potentially improve the performance of GWAS under the imbalance condition. ACID is inspired by the imbalance learning theory but is particularly modified to address the task of association discovery from sequential genomic data. Simulation studies demonstrate ACID can dramatically improve the power of traditional GWAS method on the dataset with severe imbalances. We further applied ACID to two imbalanced datasets gastric cancer and bladder cancer to conduct genome wide association analysis. Experimental results indicate that our method has better abilities in identifying suspicious loci than the regression approach and shows consistencies with existing discoveries.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiao:2018:FDA, author = "Hongmei Jiao and Michael Shi and Qikun Shen and Junwu Zhu and Peng Shi", title = "Filter Design with Adaptation to Time-Delay Parameters for Genetic Regulatory Networks", journal = j-TCBB, volume = "15", number = "1", pages = "323--329", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2606430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In existing works, the filters designed for delayed genetic regulatory networks GRNs contain time delay. If the time delay is unknown, the filters do not work in practical applications. In order to overcome the shortcoming in such existing works, this paper studies the filter design problem of GRNs with unknown constant time delay, and a novel adaptive filter is introduced, in which all unknown network parameters and the unknown time delay can be estimated online. By Lyapunove approach, it is shown that the estimating errors asymptotically converge to the origin. Finally, simulation results are presented to illustrate the effectiveness of the new method proposed in this paper.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Elmsallati:2018:IBN, author = "Ahed Elmsallati and Abdulghani Msalati and Jugal Kalita", title = "Index-Based Network Aligner of Protein--Protein Interaction Networks", journal = j-TCBB, volume = "15", number = "1", pages = "330--336", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2613098", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Network Alignment over graph-structured data has received considerable attention in many recent applications. Global network alignment tries to uniquely find the best mapping for a node in one network to only one node in another network. The mapping is performed according to some matching criteria that depend on the nature of data. In molecular biology, functional orthologs, protein complexes, and evolutionary conserved pathways are some examples of information uncovered by global network alignment. Current techniques for global network alignment suffer from several drawbacks, e.g., poor performance and high memory requirements. We address these problems by proposing IBNAL, Indexes-Based Network ALigner, for better alignment quality and faster results. To accelerate the alignment step, IBNAL makes use of a novel clique-based index and is able to align large networks in seconds. IBNAL produces a higher topological quality alignment and comparable biological match in alignment relative to other state-of-the-art aligners even though topological fit is primarily used to match nodes. IBNAL's results confirm and give another evidence that homology information is more likely to be encoded in network topology than sequence information.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Allman:2018:STI, author = "Elizabeth S. Allman and James H. Degnan and John A. Rhodes", title = "Species Tree Inference from Gene Splits by Unrooted {STAR} Methods", journal = j-TCBB, volume = "15", number = "1", pages = "337--342", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2604812", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The $ \text {NJ}_{st} $ method was proposed by Liu and Yu to infer a species tree topology from unrooted topological gene trees. While its statistical consistency under the multispecies coalescent model was established only for a four-taxon tree, simulations demonstrated its good performance on gene trees inferred from sequences for many taxa. Here, we prove the statistical consistency of the method for an arbitrarily large species tree. Our approach connects $ \text {NJ}_{st} $ to a generalization of the STAR method of Liu, Pearl, and Edwards, and a previous theoretical analysis of it. We further show $ \text {NJ}_{st} $ utilizes only the distribution of splits in the gene trees, and not their individual topologies. Finally, we discuss how multiple samples per taxon per gene should be handled for statistical consistency.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Matsen:2018:TRT, author = "Frederick A. Matsen and Sara C. Billey and Arnold Kas and Matjaz Konvalinka", title = "Tanglegrams: a Reduction Tool for Mathematical Phylogenetics", journal = j-TCBB, volume = "15", number = "1", pages = "343--349", month = jan, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2613040", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 13 17:18:15 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many discrete mathematics problems in phylogenetics are defined in terms of the relative labeling of pairs of leaf-labeled trees. These relative labelings are naturally formalized as tanglegrams, which have previously been an object of study in coevolutionary analysis. Although there has been considerable work on planar drawings of tanglegrams, they have not been fully explored as combinatorial objects until recently. In this paper, we describe how many discrete mathematical questions on trees ``factor'' through a problem on tanglegrams, and how understanding that factoring can simplify analysis. Depending on the problem, it may be useful to consider a unordered version of tanglegrams, and/or their unrooted counterparts. For all of these definitions, we show how the isomorphism types of tanglegrams can be understood in terms of double cosets of the symmetric group, and we investigate their automorphisms. Understanding tanglegrams better will isolate the distinct problems on leaf-labeled pairs of trees and reveal natural symmetries of spaces associated with such problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{daSilvaArruda:2018:GBH, author = "Thiago {da Silva Arruda} and Ulisses Dias and Zanoni Dias", title = "A {GRASP}-Based Heuristic for the Sorting by Length-Weighted Inversions Problem", journal = j-TCBB, volume = "15", number = "2", pages = "352--363", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2474400", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome Rearrangements are large-scale mutational events that affect genomes during the evolutionary process. Therefore, these mutations differ from punctual mutations. They can move genes from one place to the other, change the orientation of some genes, or even change the number of chromosomes. In this work, we deal with inversion events which occur when a segment of DNA sequence in the genome is reversed. In our model, each inversion costs the number of elements in the reversed segment. We present a new algorithm for this problem based on the metaheuristic called Greedy Randomized Adaptive Search Procedure GRASP that has been routinely used to find solutions for combinatorial optimization problems. In essence, we implemented an iterative process in which each iteration receives a feasible solution whose neighborhood is investigated. Our analysis shows that we outperform any other approach by significant margin. We also use our algorithm to build phylogenetic trees for a subset of species in the Yersinia genus and we compared our trees to other results in the literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lin:2018:MGD, author = "Congping Lin and Laurent Lemarchand and Reinhardt Euler and Imogen Sparkes", title = "Modeling the Geometry and Dynamics of the Endoplasmic Reticulum Network", journal = j-TCBB, volume = "15", number = "2", pages = "377--386", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2389226", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The endoplasmic reticulum ER is an intricate network that pervades the entire cortex of plant cells and its geometric shape undergoes drastic changes. This paper proposes a mathematical model to reconstruct geometric network dynamics by combining the node movements within the network and topological changes engendered by these nodes. The network topology in the model is determined by a modified optimization procedure from the work Lemarchand, et al. 2014 which minimizes the total length taking into account both degree and angle constraints, beyond the conditions of connectedness and planarity. A novel feature for solving our optimization problem is the use of ``lifted'' angle constraints, which allows one to considerably reduce the solution runtimes. Using this optimization technique and a Langevin approach for the branching node movement, the simulated network dynamics represent the ER network dynamics observed under latrunculin B treated condition and recaptures features such as the appearance/disappearance of loops within the ER under the native condition. The proposed modeling approach allows quantitative comparison of networks between the model and experimental data based on topological changes induced by node dynamics. An increased temporal resolution of experimental data will allow a more detailed comparison of network dynamics using this modeling approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ghaffari:2018:ENT, author = "Noushin Ghaffari and Osama A. Arshad and Hyundoo Jeong and John Thiltges and Michael F. Criscitiello and Byung-Jun Yoon and Aniruddha Datta and Charles D. Johnson", title = "Examining {De Novo} Transcriptome Assemblies via a Quality Assessment Pipeline", journal = j-TCBB, volume = "15", number = "2", pages = "494--505", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2446478", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "New de novo transcriptome assembly and annotation methods provide an incredible opportunity to study the transcriptome of organisms that lack an assembled and annotated genome. There are currently a number of de novo transcriptome assembly methods, but it has been difficult to evaluate the quality of these assemblies. In order to assess the quality of the transcriptome assemblies, we composed a workflow of multiple quality check measurements that in combination provide a clear evaluation of the assembly performance. We presented novel transcriptome assemblies and functional annotations for Pacific Whiteleg Shrimp Litopenaeus vannamei , a mariculture species with great national and international interest, and no solid transcriptome/genome reference. We examined Pacific Whiteleg transcriptome assemblies via multiple metrics, and provide an improved gene annotation. Our investigations show that assessing the quality of an assembly purely based on the assembler's statistical measurements can be misleading; we propose a hybrid approach that consists of statistical quality checks and further biological-based evaluations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2018:CPP, author = "Mansuck Kim and Huan Zhang and Charles Woloshuk and Won-Bo Shim and Byung-Jun Yoon", title = "Computational Prediction of Pathogenic Network Modules in Fusarium verticillioides", journal = j-TCBB, volume = "15", number = "2", pages = "506--515", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2440232", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Fusarium verticillioides is a fungal pathogen that triggers stalk rots and ear rots in maize. In this study, we performed a comparative analysis of wild type and loss-of-virulence mutant F. verticillioides co-expression networks to identify subnetwork modules that are associated with its pathogenicity. We constructed the F. verticillioides co-expression networks from RNA-Seq data and searched through these networks to identify subnetwork modules that are differentially activated between the wild type and mutant F. verticillioides, which considerably differ in terms of pathogenic potentials. A greedy seed-and-extend approach was utilized in our search, where we also used an efficient branch-out technique for reliable prediction of functional subnetwork modules in the fungus. Through our analysis, we identified four potential pathogenicity-associated subnetwork modules, each of which consists of interacting genes with coordinated expression patterns, but whose activation level is significantly different in the wild type and the mutant. The predicted modules were comprised of functionally coherent genes and topologically cohesive. Furthermore, they contained several orthologs of known pathogenic genes in other fungi, which may play important roles in the fungal pathogenesis.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bahadorinejad:2018:OFD, author = "Arghavan Bahadorinejad and Ulisses M. Braga-Neto", title = "Optimal Fault Detection and Diagnosis in Transcriptional Circuits Using Next-Generation Sequencing", journal = j-TCBB, volume = "15", number = "2", pages = "516--525", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2404819", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We propose a methodology for model-based fault detection and diagnosis for stochastic Boolean dynamical systems indirectly observed through a single time series of transcriptomic measurements using Next Generation Sequencing NGS data. The fault detection consists of an innovations filter followed by a fault certification step, and requires no knowledge about the possible system faults. The innovations filter uses the optimal Boolean state estimator, called the Boolean Kalman Filter BKF. In the presence of knowledge about the possible system faults, we propose an additional step of fault diagnosis based on a multiple model adaptive estimation MMAE method consisting of a bank of BKFs running in parallel. Performance is assessed by means of false detection and misdiagnosis rates, as well as average times until correct detection and diagnosis. The efficacy of the proposed methodology is demonstrated via numerical experiments using a p53-MDM2 negative feedback loop Boolean network with stuck-at faults that model molecular events commonly found in cancer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cui:2018:MND, author = "Xiaodong Cui and Lin Zhang and Jia Meng and Manjeet K. Rao and Yidong Chen and Yufei Huang", title = "{MeTDiff}: a Novel Differential {RNA} Methylation Analysis for {MeRIP-Seq} Data", journal = j-TCBB, volume = "15", number = "2", pages = "526--534", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2403355", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "N6-Methyladenosine m6A transcriptome methylation is an exciting new research area that just captures the attention of research community. We present in this paper, MeTDiff, a novel computational tool for predicting differential m6A methylation sites from Methylated RNA immunoprecipitation sequencing MeRIP-Seq data. Compared with the existing algorithm exomePeak, the advantages of MeTDiff are that it explicitly models the reads variation in data and also devices a more power likelihood ratio test for differential methylation site prediction. Comprehensive evaluation of MeTDiff's performance using both simulated and real datasets showed that MeTDiff is much more robust and achieved much higher sensitivity and specificity over exomePeak. The R package ``MeTDiff'' and additional details are available at: https://github.com/compgenomics/MeTDiff.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2018:BMV, author = "Yize Zhao and Jian Kang and Qi Long", title = "{Bayesian} Multiresolution Variable Selection for Ultra-High Dimensional Neuroimaging Data", journal = j-TCBB, volume = "15", number = "2", pages = "537--550", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2440244", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ultra-high dimensional variable selection has become increasingly important in analysis of neuroimaging data. For example, in the Autism Brain Imaging Data Exchange ABIDE study, neuroscientists are interested in identifying important biomarkers for early detection of the autism spectrum disorder ASD using high resolution brain images that include hundreds of thousands voxels. However, most existing methods are not feasible for solving this problem due to their extensive computational costs. In this work, we propose a novel multiresolution variable selection procedure under a Bayesian probit regression framework. It recursively uses posterior samples for coarser-scale variable selection to guide the posterior inference on finer-scale variable selection, leading to very efficient Markov chain Monte Carlo MCMC algorithms. The proposed algorithms are computationally feasible for ultra-high dimensional data. Also, our model incorporates two levels of structural information into variable selection using Ising priors: the spatial dependence between voxels and the functional connectivity between anatomical brain regions. Applied to the resting state functional magnetic resonance imaging R-fMRI data in the ABIDE study, our methods identify voxel-level imaging biomarkers highly predictive of the ASD, which are biologically meaningful and interpretable. Extensive simulations also show that our methods achieve better performance in variable selection compared to existing methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yuan:2018:GIH, author = "Lin Yuan and Fanglin Chen and Ling-Li Zeng and Lubin Wang and Dewen Hu", title = "Gender Identification of Human Brain Image with a Novel {$3$D} Descriptor", journal = j-TCBB, volume = "15", number = "2", pages = "551--561", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2448081", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Determining gender by examining the human brain is not a simple task because the spatial structure of the human brain is complex, and no obvious differences can be seen by the naked eyes. In this paper, we propose a novel three-dimensional feature descriptor, the three-dimensional weighted histogram of gradient orientation 3D WHGO to describe this complex spatial structure. The descriptor combines local information for signal intensity and global three-dimensional spatial information for the whole brain. We also improve a framework to address the classification of three-dimensional images based on MRI. This framework, three-dimensional spatial pyramid, uses additional information regarding the spatial relationship between features. The proposed method can be used to distinguish gender at the individual level. We examine our method by using the gender identification of individual magnetic resonance imaging MRI scans of a large sample of healthy adults across four research sites, resulting in up to individual-level accuracies under the optimized parameters for distinguishing between females and males. Compared with previous methods, the proposed method obtains higher accuracy, which suggests that this technology has higher discriminative power. With its improved performance in gender identification, the proposed method may have the potential to inform clinical practice and aid in research on neurological and psychiatric disorders.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2018:CBG, author = "Meng Hu and Wu Li and Hualou Liang", title = "A Copula-Based {Granger} Causality Measure for the Analysis of Neural Spike Train Data", journal = j-TCBB, volume = "15", number = "2", pages = "562--569", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2388311", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In systems neuroscience, it is becoming increasingly common to record the activity of hundreds of neurons simultaneously via electrode arrays. The ability to accurately measure the causal interactions among multiple neurons in the brain is crucial to understanding how neurons work in concert to generate specific brain functions. The development of new statistical methods for assessing causal influence between spike trains is still an active field of neuroscience research. Here, we suggest a copula-based Granger causality measure for the analysis of neural spike train data. This method is built upon our recent work on copula Granger causality for the analysis of continuous-valued time series by extending it to point-process neural spike train data. The proposed method is therefore able to reveal nonlinear and high-order causality in the spike trains while retaining all the computational advantages such as model-free, efficient estimation, and variability assessment of Granger causality. The performance of our algorithm can be further boosted with time-reversed data. Our method performed well on extensive simulations, and was then demonstrated on neural activity simultaneously recorded from primary visual cortex of a monkey performing a contour detection task.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:CTC, author = "Lijun Zhang and Ming Wang and Nicholas W. Sterling and Eun-Young Lee and Paul J. Eslinger and Daymond Wagner and Guangwei Du and Mechelle M. Lewis and Young Truong and F. DuBois Bowman and Xuemei Huang", title = "Cortical Thinning and Cognitive Impairment in {Parkinson}'s Disease without Dementia", journal = j-TCBB, volume = "15", number = "2", pages = "570--580", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2465951", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Parkinson's disease PD is a progressive neurodegenerative disorder characterized clinically by motor dysfunction bradykinesia, rigidity, tremor, and postural instability, and pathologically by the loss of dopaminergic neurons in the substantia nigra of the basal ganglia. Growing literature supports that cognitive deficits may also be present in PD, even in non-demented patients. Gray matter GM atrophy has been reported in PD and may be related to cognitive decline. This study investigated cortical thickness in non-demented PD subjects and elucidated its relationship to cognitive impairment using high-resolution T1-weighted brain MRI and comprehensive cognitive function scores from 71 non-demented PD and 48 control subjects matched for age, gender, and education. Cortical thickness was compared between groups using a flexible hierarchical multivariate Bayesian model, which accounts for correlations between brain regions. Correlation analyses were performed among brain areas and cognitive domains as well, which showed significant group differences in the PD population. Compared to Controls, PD subjects demonstrated significant age-adjusted cortical thinning predominantly in inferior and superior parietal areas and extended to superior frontal, superior temporal, and precuneus areas posterior probability $ > 0.9 $. Cortical thinning was also found in the left precentral and lateral occipital, and right postcentral, middle frontal, and fusiform regions posterior probability $ > 0.9 $. PD patients showed significantly reduced cognitive performance in executive function, including set shifting $ p = 0.005 $ and spontaneous flexibility $ p = 0.02 $, which were associated with the above cortical thinning regions $ p < 0.05 $.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sato:2018:CNM, author = "Joao Ricardo Sato and Maciel Calebe Vidal and Suzana de Siqueira Santos and Katlin Brauer Massirer and Andre Fujita", title = "Complex Network Measures in Autism Spectrum Disorders", journal = j-TCBB, volume = "15", number = "2", pages = "581--587", month = mar, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476787", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Apr 7 18:55:55 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent studies have suggested abnormal brain network organization in subjects with Autism Spectrum Disorders ASD. Here we applied spectral clustering algorithm, diverse centrality measures betweenness BC, clustering CC, eigenvector EC, and degree DC, and also the network entropy NE to identify brain sub-systems associated with ASD. We have found that BC increases in the following ASD clusters: in the somatomotor, default-mode, cerebellar, and fronto-parietal. On the other hand, CC, EC, and DC decrease in the somatomotor, default-mode, and cerebellar clusters. Additionally, NE decreases in ASD in the cerebellar cluster. These findings reinforce the hypothesis of under-connectivity in ASD and suggest that the difference in the network organization is more prominent in the cerebellar system. The cerebellar cluster presents reduced NE in ASD, which relates to a more regular organization of the networks. These results might be important to improve current understanding about the etiological processes and the development of potential tools supporting diagnosis and therapeutic interventions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Handl:2018:AAD, author = "J. Handl and A. Shehu and Jose {Santos Reyes}", title = "Advances in the Application and Development of Non-Linear Global Optimization Techniques in Computational Structural Biology", journal = j-TCBB, volume = "15", number = "3", pages = "688--689", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2817267", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Correa:2018:MAP, author = "Leonardo Correa and Bruno Borguesan and Camilo Farfan and Mario Inostroza-Ponta and Marcio Dorn", title = "A Memetic Algorithm for {$3$D} Protein Structure Prediction Problem", journal = j-TCBB, volume = "15", number = "3", pages = "690--704", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2635143", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Memetic Algorithms are population-based metaheuristics intrinsically concerned with exploiting all available knowledge about the problem under study. The incorporation of problem domain knowledge is not an optional mechanism, but a fundamental feature of the Memetic Algorithms. In this paper, we present a Memetic Algorithm to tackle the three-dimensional protein structure prediction problem. The method uses a structured population and incorporates a Simulated Annealing algorithm as a local search strategy, as well as ad-hoc crossover and mutation operators to deal with the problem. It takes advantage of structural knowledge stored in the Protein Data Bank, by using an Angle Probability List that helps to reduce the search space and to guide the search strategy. The proposed algorithm was tested on 19 protein sequences of amino acid residues, and the results show the ability of the algorithm to find native-like protein structures. Experimental results have revealed that the proposed algorithm can find good solutions regarding root-mean-square deviation and global distance total score test in comparison with the experimental protein structures. We also show that our results are comparable in terms of folding organization with state-of-the-art prediction methods, corroborating the effectiveness of our proposal.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2018:GNA, author = "Jiaxiang Huang and Maoguo Gong and Lijia Ma", title = "A Global Network Alignment Method Using Discrete Particle Swarm Optimization", journal = j-TCBB, volume = "15", number = "3", pages = "705--718", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2618380", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Molecular interactions data increase exponentially with the advance of biotechnology. This makes it possible and necessary to comparatively analyze the different data at a network level. Global network alignment is an important network comparison approach to identify conserved subnetworks and get insight into evolutionary relationship across species. Network alignment which is analogous to subgraph isomorphism is known to be an NP-hard problem. In this paper, we introduce a novel heuristic Particle-Swarm-Optimization based Network Aligner PSONA, which optimizes a weighted global alignment model considering both protein sequence similarity and interaction conservations. The particle statuses and status updating rules are redefined in a discrete form by using permutation. A seed-and-extend strategy is employed to guide the searching for the superior alignment. The proposed initialization method ``seeds'' matches with high sequence similarity into the alignment, which guarantees the functional coherence of the mapping nodes. A greedy local search method is designed as the ``extension'' procedure to iteratively optimize the edge conservations. PSONA is compared with several state-of-art methods on ten network pairs combined by five species. The experimental results demonstrate that the proposed aligner can map the proteins with high functional coherence and can be used as a booster to effectively refine the well-studied aligners.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sapin:2018:OME, author = "Emmanuel Sapin and Kenneth A. {De Jong} and Amarda Shehu", title = "From Optimization to Mapping: an Evolutionary Algorithm for Protein Energy Landscapes", journal = j-TCBB, volume = "15", number = "3", pages = "719--731", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2628745", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Stochastic search is often the only viable option to address complex optimization problems. Recently, evolutionary algorithms have been shown to handle challenging continuous optimization problems related to protein structure modeling. Building on recent work in our laboratories, we propose an evolutionary algorithm for efficiently mapping the multi-basin energy landscapes of dynamic proteins that switch between thermodynamically stable or semi-stable structural states to regulate their biological activity in the cell. The proposed algorithm balances computational resources between exploration and exploitation of the nonlinear, multimodal landscapes that characterize multi-state proteins via a novel combination of global and local search to generate a dynamically-updated, information-rich map of a protein's energy landscape. This new mapping-oriented EA is applied to several dynamic proteins and their disease-implicated variants to illustrate its ability to map complex energy landscapes in a computationally feasible manner. We further show that, given the availability of such maps, comparison between the maps of wildtype and variants of a protein allows for the formulation of a structural and thermodynamic basis for the impact of sequence mutations on dysfunction that may prove useful in guiding further wet-laboratory investigations of dysfunction and molecular interventions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rydzewski:2018:CSB, author = "Jakub Rydzewski and Rafal Jakubowski and Giuseppe Nicosia and Wieslaw Nowak", title = "Conformational Sampling of a Biomolecular Rugged Energy Landscape", journal = j-TCBB, volume = "15", number = "3", pages = "732--739", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2634008", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The protein structure refinement using conformational sampling is important in hitherto protein studies. In this paper, we examined the protein structure refinement by means of potential energy minimization using immune computing as a method of sampling conformations. The method was tested on the x-ray structure and 30 decoys of the mutant of [Leu]Enkephalin, a paradigmatic example of the biomolecular multiple-minima problem. In order to score the refined conformations, we used a standard potential energy function with the OPLSAA force field. The effectiveness of the search was assessed using a variety of methods. The robustness of sampling was checked by the energy yield function which measures quantitatively the number of the peptide decoys residing in an energetic funnel. Furthermore, the potential energy-dependent Pareto fronts were calculated to elucidate dissimilarities between peptide conformations and the native state as observed by x-ray crystallography. Our results showed that the probed potential energy landscape of [Leu]Enkephalin is self-similar on different metric scales and that the local potential energy minima of the peptide decoys are metastable, thus they can be refined to conformations whose potential energy is decreased by approximately $-$250 kJ/mol.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Leinweber:2018:GBP, author = "Matthias Leinweber and Thomas Fober and Bernd Freisleben", title = "{GPU-Based} Point Cloud Superpositioning for Structural Comparisons of Protein Binding Sites", journal = j-TCBB, volume = "15", number = "3", pages = "740--752", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2625793", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we present a novel approach to solve the labeled point cloud superpositioning problem for performing structural comparisons of protein binding sites. The solution is based on a parallel evolution strategy that operates on large populations and runs on GPU hardware. The proposed evolution strategy reduces the likelihood of getting stuck in a local optimum of the multimodal real-valued optimization problem represented by labeled point cloud superpositioning. The performance of the GPU-based parallel evolution strategy is compared to a previously proposed CPU-based sequential approach for labeled point cloud superpositioning, indicating that the GPU-based parallel evolution strategy leads to qualitatively better results and significantly shorter runtimes, with speed improvements of up to a factor of 1,500 for large populations. Binary classification tests based on the ATP, NADH, and FAD protein subsets of CavBase, a database containing putative binding sites, show average classification rate improvements from about 92 percent CPU to 96 percent GPU. Further experiments indicate that the proposed GPU-based labeled point cloud superpositioning approach can be superior to traditional protein comparison approaches based on sequence alignments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2018:BNR, author = "Jie Zhou and Yuan-Yuan Shi", title = "A Bipartite Network and Resource Transfer-Based Approach to Infer {lncRNA}-Environmental Factor Associations", journal = j-TCBB, volume = "15", number = "3", pages = "753--759", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2695187", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phenotypes and diseases are often determined by the complex interactions between genetic factors and environmental factors EFs. However, compared with protein-coding genes and microRNAs, there is a paucity of computational methods for understanding the associations between long non-coding RNAs lncRNAs and EFs. In this study, we focused on the associations between lncRNA and EFs. By using the common miRNA partners of any pair of lncRNA and EF, based on the competing endogenous RNA ceRNA hypothesis and the technique of resources transfer within the experimentally-supported lncRNA-miRNA and miRNA-EF association bipartite networks, we propose an algorithm for predicting new lncRNA-EF associations. Results show that, compared with another recently-proposed method, our approach is capable of predicting more credible lncRNA-EF associations. These results support the validity of our approach to predict biologically significant associations, which could lead to a better understanding of the molecular processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pesonen:2018:CPN, author = "Maiju Pesonen and Jaakko Nevalainen and Steven Potter and Somnath Datta and Susmita Datta", title = "A Combined {PLS} and Negative Binomial Regression Model for Inferring Association Networks from Next-Generation Sequencing Count Data", journal = j-TCBB, volume = "15", number = "3", pages = "760--773", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2665495", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A major challenge of genomics data is to detect interactions displaying functional associations from large-scale observations. In this study, a new cPLS-algorithm combining partial least squares approach with negative binomial regression is suggested to reconstruct a genomic association network for high-dimensional next-generation sequencing count data. The suggested approach is applicable to the raw counts data, without requiring any further pre-processing steps. In the settings investigated, the cPLS-algorithm outperformed the two widely used comparative methods, graphical lasso, and weighted correlation network analysis. In addition, cPLS is able to estimate the full network for thousands of genes without major computational load. Finally, we demonstrate that cPLS is capable of finding biologically meaningful associations by analyzing an example data set from a previously published study to examine the molecular anatomy of the craniofacial development.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2018:CSI, author = "Xiangtao Li and Ka-Chun Wong", title = "A Comparative Study for Identifying the Chromosome-Wide Spatial Clusters from High-Throughput Chromatin Conformation Capture Data", journal = j-TCBB, volume = "15", number = "3", pages = "774--787", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2684800", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In the past years, the high-throughput sequencing technologies have enabled massive insights into genomic annotations. In contrast, the full-scale three-dimensional arrangements of genomic regions are relatively unknown. Thanks to the recent breakthroughs in High-throughput Chromosome Conformation Capture Hi-C techniques, non-negative matrix factorization NMF has been adopted to identify local spatial clusters of genomic regions from Hi-C data. However, such non-negative matrix factorization entails a high-dimensional non-convex objective function to be optimized with non-negative constraints. We propose and compare more than ten optimization algorithms to improve the identification of local spatial clusters via NMF. To circumvent and optimize the high-dimensional, non-convex, and constrained objective function, we draw inspiration from the nature to perform in silico evolution. The proposed algorithms consist of a population of candidates to be evolved while the NMF acts as local search during the evolutions. The population based optimization algorithm coordinates and guides the non-negative matrix factorization toward global optima. Experimental results show that the proposed algorithms can improve the quality of non-negative matrix factorization over the recent state-of-the-arts. The effectiveness and robustness of the proposed algorithms are supported by comprehensive performance benchmarking on chromosome-wide Hi-C contact maps of yeast and human. In addition, time complexity analysis, convergence analysis, parameter analysis, biological case studies, and gene ontology similarity analysis are conducted to demonstrate the robustness of the proposed methods from different perspectives.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Peng:2018:JTL, author = "Xin Peng and Yang Tang and Wangli He and Wenli Du and Feng Qian", title = "A Just-in-Time Learning Based Monitoring and Classification Method for Hyper\slash Hypocalcemia Diagnosis", journal = j-TCBB, volume = "15", number = "3", pages = "788--801", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2655522", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This study focuses on the classification and pathological status monitoring of hyper/hypo-calcemia in the calcium regulatory system. By utilizing the Independent Component Analysis ICA mixture model, samples from healthy patients are collected, diagnosed, and subsequently classified according to their underlying behaviors, characteristics, and mechanisms. Then, a Just-in-Time Learning JITL has been employed in order to estimate the diseased status dynamically. In terms of JITL, for the purpose of the construction of an appropriate similarity index to identify relevant datasets, a novel similarity index based on the ICA mixture model is proposed in this paper to improve online model quality. The validity and effectiveness of the proposed approach have been demonstrated by applying it to the calcium regulatory system under various hypocalcemic and hypercalcemic diseased conditions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2018:STM, author = "Xichuan Zhou and Fan Yang and Yujie Feng and Qin Li and Fang Tang and Shengdong Hu and Zhi Lin and Lei Zhang", title = "A Spatial-Temporal Method to Detect Global Influenza Epidemics Using Heterogeneous Data Collected from the {Internet}", journal = j-TCBB, volume = "15", number = "3", pages = "802--812", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2690631", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The 2009 influenza pandemic teaches us how fast the influenza virus could spread globally within a short period of time. To address the challenge of timely global influenza surveillance, this paper presents a spatial-temporal method that incorporates heterogeneous data collected from the Internet to detect influenza epidemics in real time. Specifically, the influenza morbidity data, the influenza-related Google query data and news data, and the international air transportation data are integrated in a multivariate hidden Markov model, which is designed to describe the intrinsic temporal-geographical correlation of influenza transmission for surveillance purpose. Respective models are built for 106 countries and regions in the world. Despite that the WHO morbidity data are not always available for most countries, the proposed method achieves 90.26 to 97.10 percent accuracy on average for real-time detection of global influenza epidemics during the period from January 2005 to December 2015. Moreover, experiment shows that, the proposed method could even predict an influenza epidemic before it occurs with 89.20 percent accuracy on average. Timely international surveillance results may help the authorities to prevent and control the influenza disease at the early stage of a global influenza pandemic.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Avcu:2018:ACM, author = "Neslihan Avcu and Nihal Pekergin and Ferhan Pekergin and Cuneyt Guzelis", title = "Aggregation for Computing Multi-Modal Stationary Distributions in {$1$-D} Gene Regulatory Networks", journal = j-TCBB, volume = "15", number = "3", pages = "813--827", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2699177", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper proposes aggregation-based, three-stage algorithms to overcome the numerical problems encountered in computing stationary distributions and mean first passage times for multi-modal birth-death processes of large state space sizes. The considered birth-death processes which are defined by Chemical Master Equations are used in modeling stochastic behavior of gene regulatory networks. Computing stationary probabilities for a multi-modal distribution from Chemical Master Equations is subject to have numerical problems due to the probability values running out of the representation range of the standard programming languages with the increasing size of the state space. The aggregation is shown to provide a solution to this problem by analyzing first reduced size subsystems in isolation and then considering the transitions between these subsystems. The proposed algorithms are applied to study the bimodal behavior of the lac operon of E. coli described with a one-dimensional birth-death model. Thus, the determination of the entire parameter range of bimodality for the stochastic model of lac operon is achieved.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shao:2018:OCG, author = "Wei Shao and Mingxia Liu and Ying-Ying Xu and Hong-Bin Shen and Daoqiang Zhang", title = "An Organelle Correlation-Guided Feature Selection Approach for Classifying Multi-Label Subcellular Bio-Images", journal = j-TCBB, volume = "15", number = "3", pages = "828--838", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2677907", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nowadays, with the advances in microscopic imaging, accurate classification of bioimage-based protein subcellular location pattern has attracted as much attention as ever. One of the basic challenging problems is how to select the useful feature components among thousands of potential features to describe the images. This is not an easy task especially considering there is a high ratio of multi-location proteins. Existing feature selection methods seldom take the correlation among different cellular compartments into consideration, and thus may miss some features that will be co-important for several subcellular locations. To deal with this problem, we make use of the important structural correlation among different cellular compartments and propose an organelle structural correlation regularized feature selection method CSF Common-Sets of Features in this paper. We formulate the multi-label classification problem by adopting a group-sparsity regularizer to select common subsets of relevant features from different cellular compartments. In addition, we also add a cell structural correlation regularized Laplacian term, which utilizes the prior biological structural information to capture the intrinsic dependency among different cellular compartments. The CSF provides a new feature selection strategy for multi-label bio-image subcellular pattern classifications, and the experimental results also show its superiority when comparing with several existing algorithms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Dutta:2018:ASS, author = "Pritha Dutta and Subhadip Basu and Mahantapas Kundu", title = "Assessment of Semantic Similarity between Proteins Using Information Content and Topological Properties of the Gene Ontology Graph", journal = j-TCBB, volume = "15", number = "3", pages = "839--849", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2689762", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The semantic similarity between two interacting proteins can be estimated by combining the similarity scores of the GO terms associated with the proteins. Greater number of similar GO annotations between two proteins indicates greater interaction affinity. Existing semantic similarity measures make use of the GO graph structure, the information content of GO terms, or a combination of both. In this paper, we present a hybrid approach which utilizes both the topological features of the GO graph and information contents of the GO terms. More specifically, we 1 consider a fuzzy clustering of the GO graph based on the level of association of the GO terms, 2 estimate the GO term memberships to each cluster center based on the respective shortest path lengths, and 3 assign weightage to GO term pairs on the basis of their dissimilarity with respect to the cluster centers. We test the performance of our semantic similarity measure against seven other previously published similarity measures using benchmark protein-protein interaction datasets of Homo sapiens and Saccharomyces cerevisiae based on sequence similarity, Pfam similarity, area under ROC curve, and $ F_1 $ measure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pirayre:2018:BCC, author = "Aurelie Pirayre and Camille Couprie and Laurent Duval and Jean-Christophe Pesquet", title = "{BRANE Clust}: Cluster-Assisted Gene Regulatory Network Inference Refinement", journal = j-TCBB, volume = "15", number = "3", pages = "850--860", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2688355", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Discovering meaningful gene interactions is crucial for the identification of novel regulatory processes in cells. Building accurately the related graphs remains challenging due to the large number of possible solutions from available data. Nonetheless, enforcing a priori on the graph structure, such as modularity, may reduce network indeterminacy issues. BRANE Clust Biologically-Related A priori Network Enhancement with Clustering refines gene regulatory network GRN inference thanks to cluster information. It works as a post-processing tool for inference methods i.e., CLR, GENIE3. In BRANE Clust, the clustering is based on the inversion of a system of linear equations involving a graph-Laplacian matrix promoting a modular structure. Our approach is validated on DREAM4 and DREAM5 datasets with objective measures, showing significant comparative improvements. We provide additional insights on the discovery of novel regulatory or co-expressed links in the inferred Escherichia coli network evaluated using the STRING database. The comparative pertinence of clustering is discussed computationally SIMoNe, WGCNA, X-means and biologically RegulonDB. BRANE Clust software is available at: http://www-syscom.univ-mlv.fr/~pirayre/Codes-GRN-BRANE-clust.html.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hashem:2018:CML, author = "Somaya Hashem and Gamal Esmat and Wafaa Elakel and Shahira Habashy and Safaa Abdel Raouf and Mohamed Elhefnawi and Mohamed Eladawy and Mahmoud ElHefnawi", title = "Comparison of Machine Learning Approaches for Prediction of Advanced Liver Fibrosis in Chronic Hepatitis {C} Patients", journal = j-TCBB, volume = "15", number = "3", pages = "861--868", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2690848", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Background/Aim: Using machine learning approaches as non-invasive methods have been used recently as an alternative method in staging chronic liver diseases for avoiding the drawbacks of biopsy. This study aims to evaluate different machine learning techniques in prediction of advanced fibrosis by combining the serum bio-markers and clinical information to develop the classification models. Methods: A prospective cohort of 39,567 patients with chronic hepatitis C was divided into two sets-one categorized as mild to moderate fibrosis F0-F2, and the other categorized as advanced fibrosis F3-F4 according to METAVIR score. Decision tree, genetic algorithm, particle swarm optimization, and multi-linear regression models for advanced fibrosis risk prediction were developed. Receiver operating characteristic curve analysis was performed to evaluate the performance of the proposed models. Results: Age, platelet count, AST, and albumin were found to be statistically significant to advanced fibrosis. The machine learning algorithms under study were able to predict advanced fibrosis in patients with HCC with AUROC ranging between 0.73 and 0.76 and accuracy between 66.3 and 84.4 percent. Conclusions: Machine-learning approaches could be used as alternative methods in prediction of the risk of advanced liver fibrosis due to chronic hepatitis C.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ray:2018:DPM, author = "Sumanta Ray and Ujjwal Maulik", title = "Discovering Perturbation of Modular Structure in {HIV} Progression by Integrating Multiple Data Sources Through Non-Negative Matrix Factorization", journal = j-TCBB, volume = "15", number = "3", pages = "869--877", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2642184", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Detecting perturbation in modular structure during HIV-1 disease progression is an important step to understand stage specific infection pattern of HIV-1 virus in human cell. In this article, we proposed a novel methodology on integration of multiple biological information to identify such disruption in human gene module during different stages of HIV-1 infection. We integrate three different biological information: gene expression information, protein-protein interaction information, and gene ontology information in single gene meta-module, through non negative matrix factorization NMF. As the identified meta-modules inherit those information so, detecting perturbation of these, reflects the changes in expression pattern, in PPI structure and in functional similarity of genes during the infection progression. To integrate modules of different data sources into strong meta-modules, NMF based clustering is utilized here. Perturbation in meta-modular structure is identified by investigating the topological and intramodular properties and putting rank to those meta-modules using a rank aggregation algorithm. We have also analyzed the preservation structure of significant GO terms in which the human proteins of the meta-modules participate. Moreover, we have performed an analysis to show the change of coregulation pattern of identified transcription factors TFs over the HIV progression stages.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Awdeh:2018:DEA, author = "Aseel Awdeh and Hilary Phenix and Mads Karn and Theodore J. Perkins", title = "Dynamics in Epistasis Analysis", journal = j-TCBB, volume = "15", number = "3", pages = "878--891", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2653110", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Finding regulatory relationships between genes, including the direction and nature of influence between them, is a fundamental challenge in the field of molecular genetics. One classical approach to this problem is epistasis analysis. Broadly speaking, epistasis analysis infers the regulatory relationships between a pair of genes in a genetic pathway by considering the patterns of change in an observable trait resulting from single and double deletion of genes. While classical epistasis analysis has yielded deep insights on numerous genetic pathways, it is not without limitations. Here, we explore the possibility of dynamic epistasis analysis, in which, in addition to performing genetic perturbations of a pathway, we drive the pathway by a time-varying upstream signal. We explore the theoretical power of dynamical epistasis analysis by conducting an identifiability analysis of Boolean models of genetic pathways, comparing static and dynamic approaches. We find that even relatively simple input dynamics greatly increases the power of epistasis analysis to discriminate alternative network structures. Further, we explore the question of experiment design, and show that a subset of short time-varying signals, which we call dynamic primitives, allow maximum discriminative power with a reduced number of experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2018:EGC, author = "Tiantian He and Keith C. C. Chan", title = "Evolutionary Graph Clustering for Protein Complex Identification", journal = j-TCBB, volume = "15", number = "3", pages = "892--904", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2642107", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents a graph clustering algorithm, called EGCPI, to discover protein complexes in protein-protein interaction PPI networks. In performing its task, EGCPI takes into consideration both network topologies and attributes of interacting proteins, both of which have been shown to be important for protein complex discovery. EGCPI formulates the problem as an optimization problem and tackles it with evolutionary clustering. Given a PPI network, EGCPI first annotates each protein with corresponding attributes that are provided in Gene Ontology database. It then adopts a similarity measure to evaluate how similar the connected proteins are taking into consideration the network topology. Given this measure, EGCPI then discovers a number of graph clusters within which proteins are densely connected, based on an evolutionary strategy. At last, EGCPI identifies protein complexes in each discovered cluster based on the homogeneity of attributes performed by pairwise proteins. EGCPI has been tested with several real data sets and the experimental results show EGCPI is very effective on protein complex discovery, and the evolutionary clustering is helpful to identify protein complexes in PPI networks. The software of EGCPI can be downloaded via: https://github.com/hetiantian1985/EGCPI.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ikram:2018:ICB, author = "Najmul Ikram and Muhammad Abdul Qadir and Muhammad Tanvir Afzal", title = "Investigating Correlation between Protein Sequence Similarity and Semantic Similarity Using Gene Ontology Annotations", journal = j-TCBB, volume = "15", number = "3", pages = "905--912", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2695542", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sequence similarity is a commonly used measure to compare proteins. With the increasing use of ontologies, semantic function similarity is getting importance. The correlation between these measures has been applied in the evaluation of new semantic similarity methods, and in protein function prediction. In this research, we investigate the relationship between the two similarity methods. The results suggest absence of a strong correlation between sequence and semantic similarities. There is a large number of proteins with low sequence similarity and high semantic similarity. We observe that Pearson's correlation coefficient is not sufficient to explain the nature of this relationship. Interestingly, the term semantic similarity values above 0 and below 1 do not seem to play a role in improving the correlation. That is, the correlation coefficient depends only on the number of common GO terms in proteins under comparison, and the semantic similarity measurement method does not influence it. Semantic similarity and sequence similarity have a distinct behavior. These findings are of significant effect for future works on protein comparison, and will help understand the semantic similarity between proteins in a better way.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2018:LLM, author = "Lin Zhu and Hong-Bo Zhang and De-Shuang Huang", title = "{LMMO}: a Large Margin Approach for Refining Regulatory Motifs", journal = j-TCBB, volume = "15", number = "3", pages = "913--925", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2691325", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Although discriminative motif discovery DMD methods are promising for eliciting motifs from high-throughput experimental data, they usually have to sacrifice accuracy and may fail to fully leverage the potential of large datasets. Recently, it has been demonstrated that the motifs identified by DMDs can be significantly improved by maximizing the receiver-operating characteristic curve AUC metric, which has been widely used in the literature to rank the performance of elicited motifs. However, existing approaches for motif refinement choose to directly maximize the non-convex and discontinuous AUC itself, which is known to be difficult and may lead to suboptimal solutions. In this paper, we propose Large Margin Motif Optimizer LMMO, a large-margin-type algorithm for refining regulatory motifs. By relaxing the AUC cost function with the surrogate convex hinge loss, we show that the resultant learning problem can be cast as an instance of difference-of-convex DC programs, and solve it iteratively using constrained concave-convex procedure CCCP. To further save computational time, we combine LMMO with existing techniques for improving the scalability of large-margin-type algorithms, such as cutting plane method. Experimental evaluations on synthetic and real data illustrate the performance of the proposed approach. The code of LMMO is freely available at: https://github.com/ekffar/LMMO.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Alves:2018:MSE, author = "Pedro Alves and Shuang Liu and Daifeng Wang and Mark Gerstein", title = "Multiple-Swarm Ensembles: Improving the Predictive Power and Robustness of Predictive Models and Its Use in Computational Biology", journal = j-TCBB, volume = "15", number = "3", pages = "926--933", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2691329", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Machine learning is an integral part of computational biology, and has already shown its use in various applications, such as prognostic tests. In the last few years in the non-biological machine learning community, ensembling techniques have shown their power in data mining competitions such as the Netflix challenge; however, such methods have not found wide use in computational biology. In this work, we endeavor to show how ensembling techniques can be applied to practical problems, including problems in the field of bioinformatics, and how they often outperform other machine learning techniques in both predictive power and robustness. Furthermore, we develop a methodology of ensembling, Multi-Swarm Ensemble MSWE by using multiple particle swarm optimizations and demonstrate its ability to further enhance the performance of ensembles.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fotoohifiroozabadi:2018:NFN, author = "Samira Fotoohifiroozabadi and Mohd Saberi Mohamad and Safaai Deris", title = "{NAHAL-Flex}: a Numerical and Alphabetical Hinge Detection Algorithm for Flexible Protein Structure Alignment", journal = j-TCBB, volume = "15", number = "3", pages = "934--943", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2705080", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Flexible proteins are proteins that have conformational changes in their structures. Protein flexibility analysis is critical for classifying and understanding protein functionality. For that analysis, the hinge areas where proteins show flexibility must be detected. To detect the location of the hinges, previous methods have utilized the three-dimensional 3D structure of proteins, which is highly computational. To reduce the computational complexity, this study proposes a novel text-based method using structural alphabets SAs for detecting the hinge position, called NAHAL-Flex. Protein structures were encoded to a particular type of SA called the protein folding shape code PFSC, which remains unaffected by location, scale, and rotation. The flexible regions of the proteins are the only places in which letter sequences can be distorted. With this knowledge, it is possible to find the longest alignment path of two letter sequences using a dynamic programming DP algorithm. Then, the proposed method looks for regions where the alphabet sequence is distorted to find the most probable hinge positions. In order to reduce the number of hinge positions, a genetic algorithm GA was utilized to find the best candidate hinge points. To evaluate the method's effectiveness, four different flexible and rigid protein databases, including two small datasets and two large datasets, were utilized. For the small dataset, the NAHAL-Flex method was comparable to state-of-the-art structural flexible alignment methods. The result for the large datasets show that NAHAL-Flex outperforms some well-known alignment methods, e.g., DaliLite, Matt, DeepAlign, and TM-align; the speed of NAHAL-Flex was faster and its result was more accurate than the other methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Min:2018:NRS, author = "Wenwen Min and Juan Liu and Shihua Zhang", title = "Network-Regularized Sparse Logistic Regression Models for Clinical Risk Prediction and Biomarker Discovery", journal = j-TCBB, volume = "15", number = "3", pages = "944--953", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2640303", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Molecular profiling data e.g., gene expression has been used for clinical risk prediction and biomarker discovery. However, it is necessary to integrate other prior knowledge like biological pathways or gene interaction networks to improve the predictive ability and biological interpretability of biomarkers. Here, we first introduce a general regularized Logistic Regression LR framework with regularized term $ \lambda \Vert \boldsymbol {w} \Vert_1 + \eta \boldsymbol {w}^T \boldsymbol {M} \boldsymbol {w} $, which can reduce to different penalties, including Lasso, elastic net, and network-regularized terms with different $ \boldsymbol {M} $. This framework can be easily solved in a unified manner by a cyclic coordinate descent algorithm which can avoid inverse matrix operation and accelerate the computing speed. However, if those estimated $ \boldsymbol {w}_i $ and $ \boldsymbol {w}_j $ have opposite signs, then the traditional network-regularized penalty may not perform well. To address it, we introduce a novel network-regularized sparse LR model with a new penalty $ \lambda \Vert \boldsymbol {w} \Vert_1 + \eta | \boldsymbol {w}|^T \boldsymbol {M}| \boldsymbol {w}| $ to consider the difference between the absolute values of the coefficients. We develop two efficient algorithms to solve it. Finally, we test our methods and compare them with the related ones using simulated and real data to show their efficiency.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chicco:2018:NIE, author = "Davide Chicco and Fernando Palluzzi and Marco Masseroli", title = "Novelty Indicator for Enhanced Prioritization of Predicted Gene Ontology Annotations", journal = j-TCBB, volume = "15", number = "3", pages = "954--965", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2695459", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biomolecular controlled annotations have become pivotal in computational biology, because they allow scientists to analyze large amounts of biological data to better understand test results, and to infer new knowledge. Yet, biomolecular annotation databases are incomplete by definition, like our knowledge of biology, and might contain errors and inconsistent information. In this context, machine-learning algorithms able to predict and prioritize new annotations are both effective and efficient, especially if compared with time-consuming trials of biological validation. To limit the possibility that these techniques predict obvious and trivial high-level features, and to help prioritize their results, we introduce a new element that can improve accuracy and relevance of the results of an annotation prediction and prioritization pipeline. We propose a novelty indicator able to state the level of ``originality'' of the annotations predicted for a specific gene to Gene Ontology GO terms. This indicator, joint with our previously introduced prediction steps, helps by prioritizing the most novel interesting annotations predicted. We performed an accurate biological functional analysis of the prioritized annotations predicted with high accuracy by our indicator and previously proposed methods. The relevance of our biological findings proves effectiveness and trustworthiness of our indicator and of its prioritization of predicted annotations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Khalid:2018:PHD, author = "Zoya Khalid and Osman Ugur Sezerman", title = "Prediction of {HIV} Drug Resistance by Combining Sequence and Structural Properties", journal = j-TCBB, volume = "15", number = "3", pages = "966--973", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2638821", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Drug resistance is a major obstacle faced by therapist in treating HIV infected patients. The reason behind these phenomena is either protein mutation or the changes in gene expression level that induces resistance to drug treatments. These mutations affect the drug binding activity, hence resulting in failure of treatment. Therefore, it is necessary to conduct resistance testing in order to carry out HIV effective therapy. This study combines both sequence and structural features for predicting HIV resistance by applying SVM and Random Forests classifiers. The model was tested on the mutants of HIV-1 protease and reverse transcriptase. Taken together the features we have used in our method, total contact energies among multiple mutations have a strong impact in predicting resistance as they are crucial in understanding the interactions of HIV mutants. The combination of sequence-structure features offers high accuracy with support vector machines as compared to Random Forests classifier. Both single and acquisition of multiple mutations are important in predicting HIV resistance to certain drug treatments. We have discovered the practicality of these features; hence, these can be used in the future to predict resistance for other complex diseases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2018:RNN, author = "Jin-Xing Liu and Dong Wang and Ying-Lian Gao and Chun-Hou Zheng and Yong Xu and Jiguo Yu", title = "Regularized Non-Negative Matrix Factorization for Identifying Differentially Expressed Genes and Clustering Samples: a Survey", journal = j-TCBB, volume = "15", number = "3", pages = "974--987", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2665557", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Non-negative Matrix Factorization NMF, a classical method for dimensionality reduction, has been applied in many fields. It is based on the idea that negative numbers are physically meaningless in various data-processing tasks. Apart from its contribution to conventional data analysis, the recent overwhelming interest in NMF is due to its newly discovered ability to solve challenging data mining and machine learning problems, especially in relation to gene expression data. This survey paper mainly focuses on research examining the application of NMF to identify differentially expressed genes and to cluster samples, and the main NMF models, properties, principles, and algorithms with its various generalizations, extensions, and modifications are summarized. The experimental results demonstrate the performance of the various NMF algorithms in identifying differentially expressed genes and clustering samples.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:DMD, author = "Junhua Zhang and Shihua Zhang", title = "The Discovery of Mutated Driver Pathways in Cancer: Models and Algorithms", journal = j-TCBB, volume = "15", number = "3", pages = "988--998", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2640963", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The pathogenesis of cancer in human is still poorly understood. With the rapid development of high-throughput sequencing technologies, huge volumes of cancer genomics data have been generated. Deciphering that data poses great opportunities and challenges to computational biologists. One of such key challenges is to distinguish driver mutations, genes as well as pathways from passenger ones. Mutual exclusivity of gene mutations each patient has no more than one mutation in the gene set has been observed in various cancer types and thus has been used as an important property of a driver gene set or pathway. In this article, we aim to review the recent development of computational models and algorithms for discovering driver pathways or modules in cancer with the focus on mutual exclusivity-based ones.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2018:CDB, author = "Bin Wang and Xuedong Zheng and Shihua Zhou and Changjun Zhou and Xiaopeng Wei and Qiang Zhang and Ziqi Wei", title = "Constructing {DNA} Barcode Sets Based on Particle Swarm Optimization", journal = j-TCBB, volume = "15", number = "3", pages = "999--1002", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2679004", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Following the completion of the human genome project, a large amount of high-throughput bio-data was generated. To analyze these data, massively parallel sequencing, namely next-generation sequencing, was rapidly developed. DNA barcodes are used to identify the ownership between sequences and samples when they are attached at the beginning or end of sequencing reads. Constructing DNA barcode sets provides the candidate DNA barcodes for this application. To increase the accuracy of DNA barcode sets, a particle swarm optimization PSO algorithm has been modified and used to construct the DNA barcode sets in this paper. Compared with the extant results, some lower bounds of DNA barcode sets are improved. The results show that the proposed algorithm is effective in constructing DNA barcode sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Poirier:2018:DAB, author = "Carl Poirier and Benoit Gosselin and Paul Fortier", title = "{DNA} Assembly with {de Bruijn} Graphs Using an {FPGA} Platform", journal = j-TCBB, volume = "15", number = "3", pages = "1003--1009", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2696522", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents an FPGA implementation of a DNA assembly algorithm, called Ray, initially developed to run on parallel CPUs. The OpenCL language is used and the focus is placed on modifying and optimizing the original algorithm to better suit the new parallelization tool and the radically different hardware architecture. The results show that the execution time is roughly one fourth that of the CPU and factoring energy consumption yields a tenfold savings.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Davidson:2018:EQR, author = "Ruth Davidson and MaLyn Lawhorn and Joseph Rusinko and Noah Weber", title = "Efficient Quartet Representations of Trees and Applications to Supertree and Summary Methods", journal = j-TCBB, volume = "15", number = "3", pages = "1010--1015", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2638911", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Quartet trees displayed by larger phylogenetic trees have long been used as inputs for species tree and supertree reconstruction. Computational constraints prevent the use of all displayed quartets in many practical problems with large numbers of taxa. We introduce the notion of an Efficient Quartet System EQS to represent a phylogenetic tree with a subset of the quartets displayed by the tree. We show mathematically that the set of quartets obtained from a tree via an EQS contains all of the combinatorial information of the tree itself. Using performance tests on simulated datasets, we also demonstrate that using an EQS to reduce the number of quartets in both summary method pipelines for species tree inference as well as methods for supertree inference results in only small reductions in accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liang:2018:OGS, author = "Xianpeng Liang and Lin Zhu and De-Shuang Huang", title = "Optimization of Gene Set Annotations Using Robust Trace-Norm Multitask Learning", journal = j-TCBB, volume = "15", number = "3", pages = "1016--1021", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2690427", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene set enrichment GSE is a useful tool for analyzing and interpreting large molecular datasets generated by modern biomedical science. The accuracy and reproducibility of GSE analysis are heavily affected by the quality and integrity of gene sets annotations. In this paper, we propose a novel method, robust trace-norm multitask learning, to solve the optimization problem of gene set annotations. Inspired by the binary nature of annotations, we convert the optimization of gene set annotations into a weakly supervised classification problem and use discriminative logistic regression to fit these datasets. Then, the output of logistic regression can be used to measure the probability of the existence of annotations. In addition, the optimization of each row of the annotation matrix can be treated as an independent weakly classification task, and we use the multitask learning approach with trace-norm regularization to optimize all rows of annotation matrix simultaneously. Finally, the experiments on simulated and real data demonstrate the effectiveness and good performance of the proposed method.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2018:PTB, author = "Chengyu Liu and Rainer Lehtonen and Sampsa Hautaniemi", title = "{PerPAS}: Topology-Based Single Sample Pathway Analysis Method", journal = j-TCBB, volume = "15", number = "3", pages = "1022--1027", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2679745", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of intracellular pathways that play key roles in cancer progression and drug resistance is a prerequisite for developing targeted cancer treatments. The era of personalized medicine calls for computational methods that can function with one sample or a very small set of samples. Developing such methods is challenging because standard statistical approaches pose several limiting assumptions, such as number of samples, that prevent their application when $n$ approaches to one. We have developed a novel pathway analysis method called PerPAS to estimate pathway activity at a single sample level by integrating pathway topology and transcriptomics data. In addition, PerPAS is able to identify altered pathways between cancer and control samples as well as to identify key nodes that contribute to the pathway activity. In our case study using breast cancer data, we show that PerPAS can identify highly altered pathways that are associated with patient survival. PerPAS identified four pathways that were associated with patient survival and were successfully validated in three independent breast cancer cohorts. In comparison to two other pathway analysis methods that function at a single sample level, PerPAS had superior performance in both synthetic and breast cancer expression datasets. PerPAS is a free R package http://csbi.ltdk.helsinki.fi/pub/czliu/perpas/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Torshizi:2018:SPI, author = "Abolfazl Doostparast Torshizi and Linda Petzold", title = "Sparse Pathway-Induced Dynamic Network Biomarker Discovery for Early Warning Signal Detection in Complex Diseases", journal = j-TCBB, volume = "15", number = "3", pages = "1028--1034", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2687925", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In many complex diseases, the transition process from the healthy stage to the catastrophic stage does not occur gradually. Recent studies indicate that the initiation and progression of such diseases are comprised of three steps including healthy stage, pre-disease stage, and disease stage. It has been demonstrated that a certain set of trajectories can be observed in the genetic signatures at the molecular level, which might be used to detect the pre-disease stage and to take necessary medical interventions. In this paper, we propose two optimization-based algorithms for extracting the dynamic network biomarkers responsible for catastrophic transition into the disease stage, and to open new horizons to reverse the disease progression at an early stage through pinpointing molecular signatures provided by high-throughput microarray data. The first algorithm relies on meta-heuristic intelligent search to characterize dynamic network biomarkers represented as a complete graph. The second algorithm induces sparsity on the adjacency matrix of the genes by taking into account the biological signaling and metabolic pathways, since not all the genes in the ineractome are biologically linked. Comprehensive numerical and meta-analytical experiments verify the effectiveness of the results of the proposed approaches in terms of network size, biological meaningfulness, and verifiability.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:CDE, author = "Wei Zhang and Jia Xu and Yuanyuan Li and Xiufen Zou", title = "Correction to {``Detecting Essential Proteins Based on Network Topology, Gene Expression Data, and Gene Ontology Information''}", journal = j-TCBB, volume = "15", number = "3", pages = "1035--1035", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2813918", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", note = "See \cite{Zhang:2018:DEP}.", abstract = "Presents corrections to author information for the paper, W. Zhang, J. Xu, Y. Li, and X. Zou, ``Detecting essential proteins based on network topology, gene expression data, and gene ontology information,'', IEEE/ACM Trans. Comput. Biol. Bioinf., vol. 15, no. 1, pp. 109--116, Jan./Feb. 2018.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Murali:2018:GE, author = "T. M. Murali", title = "Guest Editorial", journal = j-TCBB, volume = "15", number = "4", pages = "1036--1036", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2856658", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ghoshal:2018:DCM, author = "Asish Ghoshal and Jinyi Zhang and Michael A. Roth and Kevin Muyuan Xia and Ananth Y. Grama and Somali Chaterji", title = "A Distributed Classifier for {MicroRNA} Target Prediction with Validation Through {TCGA} Expression Data", journal = j-TCBB, volume = "15", number = "4", pages = "1037--1051", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2828305", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Background: MicroRNAs miRNAs are approximately 22-nucleotide long regulatory RNA that mediate RNA interference by binding to cognate mRNA target regions. Here, we present a distributed kernel SVM-based binary classification scheme to predict miRNA targets. It captures the spatial profile of miRNA-mRNA interactions via smooth B-spline curves. This is accomplished separately for various input features, such as thermodynamic and sequence-based features. Further, we use a principled approach to uniformly model both canonical and non-canonical seed matches, using a novel seed enrichment metric. Finally, we verify our miRNA-mRNA pairings using an Elastic Net-based regression model on TCGA expression data for four cancer types to estimate the miRNAs that together regulate any given mRNA. Results: We present a suite of algorithms for miRNA target prediction, under the banner Avishkar, with superior prediction performance over the competition. Specifically, our final kernel SVM model, with an Apache Spark backend, achieves an average true positive rate TPR of more than 75 percent, when keeping the false positive rate of 20 percent, for non-canonical human miRNA target sites. This is an improvement of over 150 percent in the TPR for non-canonical sites, over the best-in-class algorithm. We are able to achieve such superior performance by representing the thermodynamic and sequence profiles of miRNA-mRNA interaction as curves, devising a novel seed enrichment metric, and learning an ensemble of miRNA family-specific kernel SVM classifiers. We provide an easy-to-use system for large-scale interactive analysis and prediction of miRNA targets. All operations in our system, namely candidate set generation, feature generation and transformation, training, prediction, and computing performance metrics are fully distributed and are scalable. Conclusions: We have developed an efficient SVM-based model for miRNA target prediction using recent CLIP-seq data, demonstrating superior performance, evaluated using ROC curves for different species human or mouse, or different target types canonical or non-canonical. We analyzed the agreement between the target pairings using CLIP-seq data and using expression data from four cancer types. To the best of our knowledge, we provide the first distributed framework for miRNA target prediction based on Apache Hadoop and Spark. Availability: All source code and sample data are publicly available at https://bitbucket.org/cellsandmachines/avishkar. Our scalable implementation of kernel SVM using Apache Spark, which can be used to solve large-scale non-linear binary classification problems, is available at https://bitbucket.org/cellsandmachines/kernelsvmspark.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Qiao:2018:QDA, author = "Shi Qiao and Mehmet Koyut{\"u}rk and Meral Z. {\"O}zsoyo{\u{g}}lu", title = "Querying of Disparate Association and Interaction Data in Biomedical Applications", journal = j-TCBB, volume = "15", number = "4", pages = "1052--1065", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2637344", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In biomedical applications, network models are commonly used to represent interactions and higher-level associations among biological entities. Integrated analyses of these interaction and association data has proven useful in extracting knowledge, and generating novel hypotheses for biomedical research. However, since most datasets provide their own schema and query interface, opportunities for exploratory and integrative querying of disparate data are currently limited. In this study, we utilize RDF-based representations of biomedical interaction and association data to develop a querying framework that enables flexible specification and efficient processing of graph template matching queries. The proposed framework enables integrative querying of biomedical databases to discover complex patterns of associations among a diverse range of biological entities, including biomolecules, biological processes, organisms, and phenotypes. Our experimental results on the UniProt dataset show that the proposed framework can be used to efficiently process complex queries, and identify biologically relevant patterns of associations that cannot be readily obtained by querying each dataset independently.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gossmann:2018:SRM, author = "Alexej Gossmann and Shaolong Cao and Damian Brzyski and Lan-Juan Zhao and Hong-Wen Deng and Yu-Ping Wang", title = "A Sparse Regression Method for Group-Wise Feature Selection with False Discovery Rate Control", journal = j-TCBB, volume = "15", number = "4", pages = "1066--1078", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2780106", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/s-plus.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The method of Sorted L-One Penalized Estimation, or SLOPE, is a sparse regression method recently introduced by Bogdan et. al. [1] . It can be used to identify significant predictor variables in a linear model that may have more unknown parameters than observations. When the correlations between predictor variables are small, the SLOPE method is shown to successfully control the false discovery rate the expected proportion of the irrelevant among all selected predictors at a user specified level. However, the requirement for nearly uncorrelated predictors is too restrictive for genomic data, as demonstrated in our recent study [2] by an application of SLOPE to realistic simulated DNA sequence data. A possible solution is to divide the predictor variables into nearly uncorrelated groups, and to modify the procedure to select entire groups with an overall significant group effect, rather than individual predictors. Following this motivation, we extend SLOPE in the spirit of Group LASSO to Group SLOPE, a method that can handle group structures between the predictor variables, which are ubiquitous in real genomic data. Our theoretical results show that Group SLOPE controls the group-wise false discovery rate gFDR, when groups are orthogonal to each other. For use in non-orthogonal settings, we propose two types of Monte Carlo based heuristics, which lead to gFDR control with Group SLOPE in simulations based on real SNP data. As an illustration of the merits of this method, an application of Group SLOPE to a dataset from the Framingham Heart Study results in the identification of some known DNA sequence regions associated with bone health, as well as some new candidate regions. The novel methods are implemented in the R package grpSLOPEMC, which is publicly available at https://github.com/agisga/grpSLOPEMC.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Alim:2018:CSP, author = "Md Abdul Alim and Ahmet Ay and Md Mahmudul Hasan and My T. Thai and Tamer Kahveci", title = "Construction of Signaling Pathways with {RNAi} Data and Multiple Reference Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1079--1091", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2710129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Signaling networks are involved in almost all major diseases such as cancer. As a result of this, understanding how signaling networks function is vital for finding new treatments for many diseases. Using gene knockdown assays such as RNA interference RNAi technology, many genes involved in these networks can be identified. However, determining the interactions between these genes in the signaling networks using only experimental techniques is very challenging, as performing extensive experiments is very expensive and sometimes, even impractical. Construction of signaling networks from RNAi data using computational techniques have been proposed as an alternative way to solve this challenging problem. However, the earlier approaches are either not scalable to large scale networks, or their accuracy levels are not satisfactory. In this study, we integrate RNAi data given on a target network with multiple reference signaling networks and phylogenetic trees to construct the topology of the target signaling network. In our work, the network construction is considered as finding the minimum number of edit operations on given multiple reference networks, in which their contributions are weighted by their phylogenetic distances to the target network. The edit operations on the reference networks lead to a target network that satisfies the RNAi knockdown observations. Here, we propose two new reference-based signaling network construction methods that provide optimal results and scale well to large-scale signaling networks of hundreds of components. We compare the performance of these approaches to the state-of-the-art reference-based network construction method SiNeC on synthetic, semi-synthetic, and real datasets. Our analyses show that the proposed methods outperform SiNeC method in terms of accuracy. Furthermore, we show that our methods function well even if evolutionarily distant reference networks are used. Application of our methods to the Apoptosis and Wnt signaling pathways recovers the known protein-protein interactions and suggests additional relevant interactions that can be tested experimentally.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fan:2018:GES, author = "Xiaodan Fan and Xinglai Ji and Rui Jiang", title = "Guest Editorial for Special Section on the {Sixth National Conference on Bioinformatics and System Biology of China}", journal = j-TCBB, volume = "15", number = "4", pages = "1092--1092", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2838498", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zamanighomi:2018:GRN, author = "Mahdi Zamanighomi and Mostafa Zamanian and Michael Kimber and Zhengdao Wang", title = "Gene Regulatory Network Inference from Perturbed Time-Series Expression Data via Ordered Dynamical Expansion of Non-Steady State Actors", journal = j-TCBB, volume = "15", number = "4", pages = "1093--1106", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2509992", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The reconstruction of gene regulatory networks from gene expression data has been the subject of intense research activity. A variety of models and methods have been developed to address different aspects of this important problem. However, these techniques are narrowly focused on particular biological and experimental platforms, and require experimental data that are typically unavailable and difficult to ascertain. The more recent availability of higher-throughput sequencing platforms, combined with more precise modes of genetic perturbation, presents an opportunity to formulate more robust and comprehensive approaches to gene network inference. Here, we propose a step-wise framework for identifying gene-gene regulatory interactions that expand from a known point of genetic or chemical perturbation using time series gene expression data. This novel approach sequentially identifies non-steady state genes post-perturbation and incorporates them into a growing series of low-complexity optimization problems. The governing ordinary differential equations of this model are rooted in the biophysics of stochastic molecular events that underlie gene regulation, delineating roles for both protein and RNA-mediated gene regulation. We show the successful application of our core algorithms for network inference using simulated and real datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2018:PWT, author = "Ke Liu and Sha Hou and Junbiao Dai and Zhirong Sun", title = "{PyMut}: a {Web} Tool for Overlapping Gene Loss-of-Function Mutation Design", journal = j-TCBB, volume = "15", number = "4", pages = "1107--1110", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505290", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Loss-of-function study is an effective approach to research gene functions. However, currently most of such studies have ignored an important problem in this paper, we call it ``off-target'' problem, that is, if the target gene is an overlapping gene A gene whose expressible nucleotides overlaps with that of another one, loss-of-function mutation by deleting the complete open reading frame ORF may also cause the gene it overlaps lose function, resulting a phenotype which may be rather different from that of single gene deletion. Therefore, when doing such studies, the loss-of-function mutations should be carefully designed to guarantee only the function of the target gene will be abolished. In this paper, we present PyMut, an easy-to-use web tool for biologists to design such mutations. To the best of our knowledge, PyMut is the first tool that aims to solve the ``off-target'' problem regarding the overlapping genes. Our web server is freely available at http://www.bioinfo.tsinghua.edu.cn/~liuke/PyMut/index.html.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2018:NPI, author = "Xiangfei Cheng and Yue Hou and Yumin Nie and Yiru Zhang and Huan Huang and Hongde Liu and Xiao Sun", title = "Nucleosome Positioning of Intronless Genes in the Human Genome", journal = j-TCBB, volume = "15", number = "4", pages = "1111--1121", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476811", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nucleosomes, the basic units of chromatin, are involved in transcription regulation and DNA replication. Intronless genes, which constitute 3 percent of the human genome, differ from intron-containing genes in evolution and function. Our analysis reveals that nucleosome positioning shows a distinct pattern in intronless and intron-containing genes. The nucleosome occupancy upstream of transcription start sites of intronless genes is lower than that of intron-containing genes. In contrast, high occupancy and well positioned nucleosomes are observed along the gene body of intronless genes, which is perfectly consistent with the barrier nucleosome model. Intronless genes have a significantly lower expression level than intron-containing genes and most of them are not expressed in CD4+ T cell lines and GM12878 cell lines, which results from their tissue specificity. However, the highly expressed genes are at the same expression level between the two types of genes. The highly expressed intronless genes require a higher density of RNA Pol II in an elongating state to compensate for the lack of introns. Additionally, 5' and 3' nucleosome depleted regions of highly expressed intronless genes are deeper than those of highly expressed intron-containing genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bartocci:2018:GEI, author = "Ezio Bartocci and Pietro Lio and Nicola Paoletti", title = "Guest Editors' Introduction to the Special Section on the {14th International Conference on Computational Methods in Systems Biology CMSB 2016}", journal = j-TCBB, volume = "15", number = "4", pages = "1122--1123", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2816979", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feret:2018:LTA, author = "Jerome Feret and Kim Quyen Ly", title = "Local Traces: an Over-Approximation of the Behavior of the Proteins in Rule-Based Models", journal = j-TCBB, volume = "15", number = "4", pages = "1124--1137", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2812195", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Thanks to rule-based modelling languages, we can assemble large sets of mechanistic protein-protein interactions within integrated models. Our goal would be to understand how the behavior of these systems emerges from these low-level interactions. Yet, this is a quite long term challenge and it is desirable to offer intermediary levels of abstraction, so as to get a better understanding of the models and to increase our confidence within our mechanistic assumptions. To this extend, static analysis can be used to derive various abstractions of the semantics, each of them offering new perspectives on the models. We propose an abstract interpretation of the behavior of each protein, in isolation. Given a model written in Kappa, this abstraction computes for each kind of proteins a transition system that describes which conformations this protein may take and how a protein may pass from one conformation to another one. Then, we use simplicial complexes to abstract away the interleaving order of the transformations between conformations that commute. As a result, we get a compact summary of the potential behavior of each protein of the model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fages:2018:INC, author = "Fran{\c{c}}ois Fages and Thierry Martinez and David A. Rosenblueth and Sylvain Soliman", title = "Influence Networks Compared with Reaction Networks: Semantics, Expressivity and Attractors", journal = j-TCBB, volume = "15", number = "4", pages = "1138--1151", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2805686", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biochemical reaction networks are one of the most widely used formalisms in systems biology to describe the molecular mechanisms of high-level cell processes. However, modellers also reason with influence diagrams to represent the positive and negative influences between molecular species and may find an influence network useful in the process of building a reaction network. In this paper, we introduce a formalism of influence networks with forces, and equip it with a hierarchy of Boolean, Petri net, stochastic and differential semantics, similarly to reaction networks with rates. We show that the expressive power of influence networks is the same as that of reaction networks under the differential semantics, but weaker under the discrete semantics. Furthermore, the hierarchy of semantics leads us to consider a positive Boolean semantics that cannot test the absence of a species, that we compare with the negative Boolean semantics with test for absence of a species in gene regulatory networks {\`a} la Thomas. We study the monotonicity properties of the positive semantics and derive from them an algorithm to compute attractors in both the positive and negative Boolean semantics. We illustrate our results on models of the literature about the p53/Mdm2 DNA damage repair system, the circadian clock, and the influence of MAPK signaling on cell-fate decision in urinary bladder cancer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mu:2018:OOC, author = "Chunyan Mu and Peter Dittrich and David Parker and Jonathan E. Rowe", title = "Organisation-Oriented Coarse Graining and Refinement of Stochastic Reaction Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1152--1166", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2804395", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Chemical organisation theory is a framework developed to simplify the analysis of long-term behaviour of chemical systems. In this work, we build on these ideas to develop novel techniques for formal quantitative analysis of chemical reaction networks, using discrete stochastic models represented as continuous-time Markov chains. We propose methods to identify organisations, and to study quantitative properties regarding movements between these organisations. We then construct and formalise a coarse-grained Markov chain model of hierarchic organisations for a given reaction network, which can be used to approximate the behaviour of the original reaction network. As an application of the coarse-grained model, we predict the behaviour of the reaction network systems over time via the master equation. Experiments show that our predictions can mimic the main pattern of the concrete behaviour in the long run, but the precision varies for different models and reaction rule rates. Finally, we propose an algorithm to selectively refine the coarse-grained models and show experiments demonstrating that the precision of the prediction has been improved.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pauleve:2018:RQM, author = "Loic Pauleve", title = "Reduction of Qualitative Models of Biological Networks for Transient Dynamics Analysis", journal = j-TCBB, volume = "15", number = "4", pages = "1167--1179", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2749225", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Qualitative models of dynamics of signalling pathways and gene regulatory networks allow for the capturing of temporal properties of biological networks while requiring few parameters. However, these discrete models typically suffer from the so-called state space explosion problem which makes the formal assessment of their potential behaviors very challenging. In this paper, we describe a method to reduce a qualitative model for enhancing the tractability of analysis of transient reachability properties. The reduction does not change the dimension of the model, but instead limits its degree of freedom, therefore reducing the set of states and transitions to consider. We rely on a transition-centered specification of qualitative models by the mean of automata networks. Our framework encompasses the usual asynchronous Boolean and multi-valued network, as well as 1-bounded Petri nets. Applied to different large-scale biological networks from the literature, we show that the reduction can lead to a drastic improvement for the scalability of verification methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Backenkohler:2018:MBP, author = "Michael Backenkohler and Luca Bortolussi and Verena Wolf", title = "Moment-Based Parameter Estimation for Stochastic Reaction Networks in Equilibrium", journal = j-TCBB, volume = "15", number = "4", pages = "1180--1192", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2775219", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Calibrating parameters is a crucial problem within quantitative modeling approaches to reaction networks. Existing methods for stochastic models rely either on statistical sampling or can only be applied to small systems. Here, we present an inference procedure for stochastic models in equilibrium that is based on a moment matching scheme with optimal weighting and that can be used with high-throughput data like the one collected by flow cytometry. Our method does not require an approximation of the underlying equilibrium probability distribution and, if reaction rate constants have to be learned, the optimal values can be computed by solving a linear system of equations. We discuss important practical issues such as the selection of the moments and evaluate the effectiveness of the proposed approach on three case studies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Villaverde:2018:PTI, author = "Alejandro F. Villaverde and Kolja Becker and Julio R. Banga", title = "{PREMER}: a Tool to Infer Biological Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1193--1202", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2758786", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Inferring the structure of unknown cellular networks is a main challenge in computational biology. Data-driven approaches based on information theory can determine the existence of interactions among network nodes automatically. However, the elucidation of certain features-such as distinguishing between direct and indirect interactions or determining the direction of a causal link-requires estimating information-theoretic quantities in a multidimensional space. This can be a computationally demanding task, which acts as a bottleneck for the application of elaborate algorithms to large-scale network inference problems. The computational cost of such calculations can be alleviated by the use of compiled programs and parallelization. To this end, we have developed PREMER Parallel Reverse Engineering with Mutual information \& Entropy Reduction, a software toolbox that can run in parallel and sequential environments. It uses information theoretic criteria to recover network topology and determine the strength and causality of interactions, and allows incorporating prior knowledge, imputing missing data, and correcting outliers. PREMER is a free, open source software tool that does not require any commercial software. Its core algorithms are programmed in FORTRAN 90 and implement OpenMP directives. It has user interfaces in Python and MATLAB/Octave, and runs on Windows, Linux, and OSX https://sites.google.com/site/premertoolbox/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mizera:2018:APT, author = "Andrzej Mizera and Jun Pang and Cui Su and Qixia Yuan", title = "{{\sf ASSA-PBN}}: a Toolbox for Probabilistic {Boolean} Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1203--1216", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2773477", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "As a well-established computational framework, probabilistic Boolean networks PBNs are widely used for modelling, simulation, and analysis of biological systems. To analyze the steady-state dynamics of PBNs is of crucial importance to explore the characteristics of biological systems. However, the analysis of large PBNs, which often arise in systems biology, is prone to the infamous state-space explosion problem. Therefore, the employment of statistical methods often remains the only feasible solution. We present $ \mathsf {ASSA - PBN} $, a software toolbox for modelling, simulation, and analysis of PBNs. $ \mathsf {ASSA - PBN} $ provides efficient statistical methods with three parallel techniques to speed up the computation of steady-state probabilities. Moreover, particle swarm optimisation PSO and differential evolution DE are implemented for the estimation of PBN parameters. Additionally, we implement in-depth analyses of PBNs, including long-run influence analysis, long-run sensitivity analysis, computation of one-parameter profile likelihoods, and the visualization of one-parameter profile likelihoods. A PBN model of apoptosis is used as a case study to illustrate the main functionalities of $ \mathsf {ASSA - PBN} $ and to demonstrate the capabilities of $ \mathsf {ASSA - PBN} $ to effectively analyse biological systems modelled as PBNs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Czeizler:2018:STC, author = "Eugen Czeizler and Kai-Chiu Wu and Cristian Gratie and Krishna Kanhaiya and Ion Petre", title = "Structural Target Controllability of Linear Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1217--1228", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2797271", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational analysis of the structure of intra-cellular molecular interaction networks can suggest novel therapeutic approaches for systemic diseases like cancer. Recent research in the area of network science has shown that network control theory can be a powerful tool in the understanding and manipulation of such bio-medical networks. In 2011, Liu et al. developed a polynomial time algorithm computing the size of the minimal set of nodes controlling a linear network. In 2014, Gao et al. generalized the problem for target control, minimizing the set of nodes controlling a target within a linear network. The authors developed a Greedy approximation algorithm while leaving open the complexity of the optimization problem. We prove here that the target controllability problem is NP-hard in all practical setups, i.e., when the control power of any individual input is bounded by some constant. We also show that the algorithm provided by Gao et al. fails to provide a valid solution in some special cases, and an additional validation step is required. We fix and improve their algorithm using several heuristics, obtaining in the end an up to 10-fold decrease in running time and also a decrease in the size of solutions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Milenkovic:2018:GLB, author = "Tijana Milenkovic and Sarath Chandra Janga", title = "{Great Lakes Bioinformatics Conference GLBIO 2015} Special Section Editorial", journal = j-TCBB, volume = "15", number = "4", pages = "1229--1230", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849800", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Presents information on the Great Lakes Bioinformatics Conference GLBIO.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pease:2018:EDU, author = "James B. Pease and Benjamin K. Rosenzweig", title = "Encoding Data Using Biological Principles: The Multisample Variant Format for Phylogenomics and Population Genomics", journal = j-TCBB, volume = "15", number = "4", pages = "1231--1238", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2509997", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Rapid progress in the fields of phylogenomics and population genomics has driven increases in both the size of multi-genomic datasets and the number and complexity of genome-wide analyses. We present the Multisample Variant Format, specifically designed to store multiple sequence alignments for phylogenomics and population genomic analysis. The signature feature of MVF is a distinctive encoding of aligned sites with specific biological information content e.g., invariant, low-coverage. This biological pattern-based encoding of sequence data allows for rapid filtering and quality control of data and speeds up computation for many analyses. Similar to other modern formats, MVF has a simple data structure and flexible header structure to accommodate project metadata, allowing to also serve as an effective data publication and sharing format. We also propose several variants of the MVF format to accommodate protein and codon alignments, quality scores, and a mix of de novo and reference-aligned data. Using the MVFtools package, MVF files can be converted from other common sequence formats. MVFtools completes tasks ranging from simple transformation and filtering operations to complex genome-wide visualizations in only a few minutes, even on large datasets. In addition to presentation of MVF and MVFtools, we also discuss the application both in MVF and other existing data formats of the broader concept of using biological principles and patterns to inform sequence data encoding.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Roth:2018:TES, author = "Adam Roth and Sandeep Subramanian and Madhavi K. Ganapathiraju", title = "Towards Extracting Supporting Information About Predicted Protein-Protein Interactions", journal = j-TCBB, volume = "15", number = "4", pages = "1239--1246", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2505278", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "One of the goals of relation extraction is to identify protein-protein interactions PPIs in biomedical literature. Current systems are capturing binary relations and also the direction and type of an interaction. Besides assisting in the curation PPIs into databases, there has been little real-world application of these algorithms. We describe UPSITE, a text mining tool for extracting evidence in support of a hypothesized interaction. Given a predicted PPI, UPSITE uses a binary relation detector to check whether a PPI is found in abstracts in PubMed. If it is not found, UPSITE retrieves documents relevant to each of the two proteins separately, and extracts contextual information about biological events surrounding each protein, and calculates semantic similarity of the two proteins to provide evidential support for the predicted PPI. In evaluations, relation extraction achieved an Fscore of 0.88 on the HPRD50 corpus, and semantic similarity measured with angular distance was found to be statistically significant. With the development of PPI prediction algorithms, the burden of interpreting the validity and relevance of novel PPIs is on biologists. We suggest that presenting annotations of the two proteins in a PPI side-by-side and a score that quantifies their similarity lessens this burden to some extent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yerneni:2018:IIS, author = "Satwica Yerneni and Ishita K. Khan and Qing Wei and Daisuke Kihara", title = "{IAS}: Interaction Specific {GO} Term Associations for Predicting Protein-Protein Interaction Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1247--1258", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2476809", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proteins carry out their function in a cell through interactions with other proteins. A large scale protein-protein interaction PPI network of an organism provides static yet an essential structure of interactions, which is valuable clue for understanding the functions of proteins and pathways. PPIs are determined primarily by experimental methods; however, computational PPI prediction methods can supplement or verify PPIs identified by experiment. Here, we developed a novel scoring method for predicting PPIs from Gene Ontology GO annotations of proteins. Unlike existing methods that consider functional similarity as an indication of interaction between proteins, the new score, named the protein-protein Interaction Association Score IAS, was computed from GO term associations of known interacting protein pairs in 49 organisms. IAS was evaluated on PPI data of six organisms and found to outperform existing GO term-based scoring methods. Moreover, consensus scoring methods that combine different scores further improved performance of PPI prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hashemikhabir:2018:FIG, author = "Seyedsasan Hashemikhabir and Ran Xia and Yang Xiang and Sarath Chandra Janga", title = "A Framework for Identifying Genotypic Information from Clinical Records: Exploiting Integrated Ontology Structures to Transfer Annotations between {ICD} Codes and Gene Ontologies", journal = j-TCBB, volume = "15", number = "4", pages = "1259--1269", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2480056", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Although some methods are proposed for automatic ontology generation, none of them address the issue of integrating large-scale heterogeneous biomedical ontologies. We propose a novel approach for integrating various types of ontologies efficiently and apply it to integrate International Classification of Diseases, Ninth Revision, Clinical Modification ICD9CM, and Gene Ontologies. This approach is one of the early attempts to quantify the associations among clinical terms e.g., ICD9 codes based on their corresponding genomic relationships. We reconstructed a merged tree for a partial set of GO and ICD9 codes and measured the performance of this tree in terms of associations' relevance by comparing them with two well-known disease-gene datasets i.e., MalaCards and Disease Ontology. Furthermore, we compared the genomic-based ICD9 associations to temporal relationships between them from electronic health records. Our analysis shows promising associations supported by both comparisons suggesting a high reliability. We also manually analyzed several significant associations and found promising support from literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Patra:2018:RAP, author = "Pranjal Patra and Tatsuo Izawa and Lourdes Pena-Castillo", title = "{REPA}: Applying Pathway Analysis to Genome-Wide Transcription Factor Binding Data", journal = j-TCBB, volume = "15", number = "4", pages = "1270--1283", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2453948", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pathway analysis has been extensively applied to aid in the interpretation of the results of genome-wide transcription profiling studies, and has been shown to successfully find associations between the biological phenomena under study and biological pathways. There are two widely used approaches of pathway analysis: over-representation analysis, and gene set analysis. Recently genome-wide transcription factor binding data has become widely available allowing for the application of pathway analysis to this type of data. In this work, we developed regulatory enrichment pathway analysis REPA to apply gene set analysis to genome-wide transcription factor binding data to infer associations between transcription factors and biological pathways. We used the transcription factor binding data generated by the ENCODE project, and gene sets from the Molecular Signatures and KEGG databases. Our results showed that 54 percent of the predictions examined have literature support and that REPA's recall is roughly 54 percent. This level of precision is promising as several of REPA's predictions are expected to be novel and can be used to guide new research avenues. In addition, the results of our case studies showed that REPA enhances the interpretation of genome-wide transcription profiling studies by suggesting putative regulators behind the observed transcriptional responses.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ramalho:2018:PEF, author = "Rodrigo F. Ramalho and Sujun Li and Predrag Radivojac and Matthew W. Hahn", title = "Proteomic Evidence for In-Frame and Out-of-Frame Alternatively Spliced Isoforms in Human and Mouse", journal = j-TCBB, volume = "15", number = "4", pages = "1284--1289", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2480068", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In order to find evidence for translation of alternatively spliced transcripts, especially those that result in a change in reading frame, we collected exon-skipping cases previously found by RNA-Seq and applied a computational approach to screen millions of mass spectra. These spectra came from seven human and six mouse tissues, five of which are the same between the two organisms: liver, kidney, lung, heart, and brain. Overall, we detected 4 percent of all exon-skipping events found in RNA-seq data, regardless of their effect on reading frame. The fraction of alternative isoforms detected did not differ between out-of-frame and in-frame events. Moreover, the fraction of identified alternative exon-exon junctions and constitutive junctions were similar. Together, our results suggest that both in-frame and out-of-frame translation may be actively used to regulate protein activity or localization.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Al-Ouran:2018:DGR, author = "Rami Al-Ouran and Robert Schmidt and Ashwini Naik and Jeffrey Jones and Frank Drews and David Juedes and Laura Elnitski and Lonnie Welch", title = "Discovering Gene Regulatory Elements Using Coverage-Based Heuristics", journal = j-TCBB, volume = "15", number = "4", pages = "1290--1300", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2496261", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Data mining algorithms and sequencing methods such as RNA-seq and ChIP-seq are being combined to discover genomic regulatory motifs that relate to a variety of phenotypes. However, motif discovery algorithms often produce very long lists of putative transcription factor binding sites, hindering the discovery of phenotype-related regulatory elements by making it difficult to select a manageable set of candidate motifs for experimental validation. To address this issue, the authors introduce the motif selection problem and provide coverage-based search heuristics for its solution. Analysis of 203 ChIP-seq experiments from the ENCyclopedia of DNA Elements project shows that our algorithms produce motifs that have high sensitivity and specificity and reveals new insights about the regulatory code of the human genome. The greedy algorithm performs the best, selecting a median of two motifs per ChIP-seq transcription factor group while achieving a median sensitivity of 77 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ironi:2018:MBT, author = "Liliana Ironi and Ettore Lanzarone", title = "A Model-Based Tool for the Analysis and Design of Gene Regulatory Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1301--1314", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2716942", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational and mathematical models have significantly contributed to the rapid progress in the study of gene regulatory networks GRN, but researchers still lack a reliable model-based framework for computer-aided analysis and design. Such tool should both reveal the relation between network structure and dynamics and find parameter values and/or constraints that enable the simulated dynamics to reproduce specific behaviors. This paper addresses these issues and proposes a computational framework that facilitates network analysis or design. It follows a modeling cycle that alternates phases of hypothesis testing with parameter space refinement to ultimately propose a network that exhibits specified behaviors with the highest probability. Hypothesis testing is performed via qualitative simulation of GRNs modeled by a class of nonlinear and temporal multiscale ODEs, where regulation functions are expressed by steep sigmoid functions and incompletely known parameter values by order relations only. Parameter space refinement, grounded on a method that considers the intrinsic stochasticity of regulation by expressing network uncertainty with fluctuations in parameter values only, optimizes parameter stochastic values initialized by probability distributions with large variances. The power and ease of our framework is demonstrated by working out a benchmark synthetic network to get a synthetic oscillator.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xia:2018:STS, author = "Chun-Qiu Xia and Ke Han and Yong Qi and Yang Zhang and Dong-Jun Yu", title = "A Self-Training Subspace Clustering Algorithm under Low-Rank Representation for Cancer Classification on Gene Expression Data", journal = j-TCBB, volume = "15", number = "4", pages = "1315--1324", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2712607", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Accurate identification of the cancer types is essential to cancer diagnoses and treatments. Since cancer tissue and normal tissue have different gene expression, gene expression data can be used as an efficient feature source for cancer classification. However, accurate cancer classification directly using original gene expression profiles remains challenging due to the intrinsic high-dimension feature and the small size of the data samples. We proposed a new self-training subspace clustering algorithm under low-rank representation, called SSC-LRR, for cancer classification on gene expression data. Low-rank representation LRR is first applied to extract discriminative features from the high-dimensional gene expression data; the self-training subspace clustering SSC method is then used to generate the cancer classification predictions. The SSC-LRR was tested on two separate benchmark datasets in control with four state-of-the-art classification methods. It generated cancer classification predictions with an overall accuracy 89.7 percent and a general correlation 0.920, which are 18.9 and 24.4 percent higher than that of the best control method respectively. In addition, several genes RNF114, HLA-DRB5, USP9Y, and PTPN20 were identified by SSC-LRR as new cancer identifiers that deserve further clinical investigation. Overall, the study demonstrated a new sensitive avenue to recognize cancer classifications from large-scale gene expression data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2018:TSB, author = "Xinyu He and Lishuang Li and Yang Liu and Xiaoming Yu and Jun Meng", title = "A Two-Stage Biomedical Event Trigger Detection Method Integrating Feature Selection and Word Embeddings", journal = j-TCBB, volume = "15", number = "4", pages = "1325--1332", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2715016", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Extracting biomedical events from biomedical literature plays an important role in the field of biomedical text mining, and the trigger detection is a key step in biomedical event extraction. We propose a two-stage method for trigger detection, which divides trigger detection into recognition stage and classification stage, and different features are selected in each stage. In the first stage, we select the features which are more suitable for recognition, and in the second stage, the features that are more helpful to classification are adopted. Furthermore, we integrate word embeddings to represent words semantically and syntactically. On the multi-level event extraction MLEE corpus test dataset, our method achieves an F-score of 79.75 percent, which outperforms the state-of-the-art systems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Deznabi:2018:IAG, author = "Iman Deznabi and Mohammad Mobayen and Nazanin Jafari and Oznur Tastan and Erman Ayday", title = "An Inference Attack on Genomic Data Using Kinship, Complex Correlations, and Phenotype Information", journal = j-TCBB, volume = "15", number = "4", pages = "1333--1343", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2709740", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Individuals and their family members share partial genomic data on public platforms. However, using special characteristics of genomic data, background knowledge that can be obtained from the Web, and family relationship between the individuals, it is possible to infer the hidden parts of shared and unshared genomes. Existing work in this field considers simple correlations in the genome as well as Mendel's law and partial genomes of a victim and his family members. In this paper, we improve the existing work on inference attacks on genomic privacy. We mainly consider complex correlations in the genome by using an observable Markov model and recombination model between the haplotypes. We also utilize the phenotype information about the victims. We propose an efficient message passing algorithm to consider all aforementioned background information for the inference. We show that the proposed framework improves inference with significantly less information compared to existing work.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xie:2018:AGS, author = "Qian Xie and Xiaoyan He and Fangji Yang and Xuling Liu and Ying Li and Yujing Liu and ZhengMeng Yang and Jianhai Yu and Bao Zhang and Wei Zhao", title = "Analysis of the Genome Sequence and Prediction of {B}-Cell Epitopes of the Envelope Protein of {Middle East} Respiratory Syndrome-Coronavirus", journal = j-TCBB, volume = "15", number = "4", pages = "1344--1350", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2702588", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The outbreak of Middle East respiratory syndrome-coronavirus MERS-CoV in South Korea in April 2015 led to 186 infections and 37 deaths by the end of October 2015. MERS-CoV was isolated from the imported patient in China. The envelope E protein, a small structural protein of MERS-CoV, plays an important role in host recognition and infection. To identify the conserved epitopes of the E protein, sequence analysis was performed by comparing the E proteins from 42 MERS-CoV strains that triggered severe pandemics and infected humans in the past. To predict the potential B cell epitopes of E protein, three most effective online epitope prediction programs, the ABCpred, Bepipred, and Protean programs from the LaserGene software were used. All the nucleotides and amino acids sequences were obtained from the NCBI Database. One potential epitope with a suitable length amino acids 58-82 was confirmed and predicted to be highly antigenic. This epitope had scores of {$ > 0.80 $} in ABCpred and level 0.35 in Bepipred programs. Due to the lack of X-ray crystal structure of the E protein in the PDB database, the simulated 3D structure of the E protein were also predicted using PHYRE 2 and Pymol programs. In conclusion, using bioinformatics methods, we analyzed the genome sequence of MERS-CoV and identified a potential B-cell epitope of the E protein, which might significantly improve our current MERS vaccine development strategies.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zarai:2018:CAC, author = "Yoram Zarai and Michael Margaliot and Eduardo D. Sontag and Tamir Tuller", title = "Controllability Analysis and Control Synthesis for the Ribosome Flow Model", journal = j-TCBB, volume = "15", number = "4", pages = "1351--1364", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2707420", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The ribosomal density along different parts of the coding regions of the mRNA molecule affects various fundamental intracellular phenomena including: protein production rates, global ribosome allocation and organismal fitness, ribosomal drop off, co-translational protein folding, mRNA degradation, and more. Thus, regulating translation in order to obtain a desired ribosomal profile along the mRNA molecule is an important biological problem. We study this problem by using a dynamical model for mRNA translation, called the ribosome flow model RFM. In the RFM, the mRNA molecule is modeled as an ordered chain of $n$ sites. The RFM includes $n$ state-variables describing the ribosomal density profile along the mRNA molecule, and the transition rates from each site to the next are controlled by $ n + 1$ positive constants. To study the problem of controlling the density profile, we consider some or all of the transition rates as time-varying controls. We consider the following problem: given an initial and a desired ribosomal density profile in the RFM, determine the time-varying values of the transition rates that steer the system to the desired density profile, if they exist. More specifically, we consider two control problems. In the first, all transition rates can be regulated separately, and the goal is to steer the ribosomal density profile and the protein production rate from a given initial value to a desired value. In the second problem, one or more transition rates are jointly regulated by a single scalar control, and the goal is to steer the production rate to a desired value within a certain set of feasible values. In the first case, we show that the system is controllable, i.e., the control is powerful enough to steer the system to any desired value in finite time, and provide simple closed-form expressions for constant positive control functions or transition rates that asymptotically steer the system to the desired value. In the second case, we show that the system is controllable, and provide a simple algorithm for determining the constant positive control value that asymptotically steers the system to the desired value. We discuss some of the biological implications of these results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gao:2018:ISE, author = "Shangce Gao and Shuangbao Song and Jiujun Cheng and Yuki Todo and MengChu Zhou", title = "Incorporation of Solvent Effect into Multi-Objective Evolutionary Algorithm for Improved Protein Structure Prediction", journal = j-TCBB, volume = "15", number = "4", pages = "1365--1378", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2705094", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of predicting the three-dimensional 3-D structure of a protein from its one-dimensional sequence has been called the ``holy grail of molecular biology'', and it has become an important part of structural genomics projects. Despite the rapid developments in computer technology and computational intelligence, it remains challenging and fascinating. In this paper, to solve it we propose a multi-objective evolutionary algorithm. We decompose the protein energy function Chemistry at HARvard Macromolecular Mechanics force fields into bond and non-bond energies as the first and second objectives. Considering the effect of solvent, we innovatively adopt a solvent-accessible surface area as the third objective. We use 66 benchmark proteins to verify the proposed method and obtain better or competitive results in comparison with the existing methods. The results suggest the necessity to incorporate the effect of solvent into a multi-objective evolutionary algorithm to improve protein structure prediction in terms of accuracy and efficiency.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meyer:2018:MMP, author = "Karlene Nicole Meyer and Michelle R. Lacey", title = "Modeling Methylation Patterns with Long Read Sequencing Data", journal = j-TCBB, volume = "15", number = "4", pages = "1379--1389", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2721943", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Variation in cytosine methylation at CpG dinucleotides is often observed in genomic regions, and analysis typically focuses on estimating the proportion of methylated sites observed in a given region and comparing these levels across samples to determine association with conditions of interest. While sites are tacitly treated as independent, when observed at the level of individual molecules methylation patterns exhibit strong evidence of local spatial dependence. We previously developed a neighboring sites model to account for correlation and clustering behavior observed in two tandem repeat regions in a collection of ovarian carcinomas. We now introduce extensions of the model that account for the effect of distance between sites as well as asymmetric correlation in de novo methylation and demethylation rates. We apply our models to published data from a whole genome bisulfite sequencing experiment using long reads, estimating model parameters for a selection of CpG-dense regions spanning between 21 and 67 sites. Our methods detect evidence of local spatial correlation as a function of site-to-site distance and demonstrate the added value of employing long read sequencing data in epigenetic research.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2018:NPN, author = "Guoxian Yu and Guangyuan Fu and Jun Wang and Yingwen Zhao", title = "{NewGOA}: Predicting New {GO} Annotations of Proteins by Bi-Random Walks on a Hybrid Graph", journal = j-TCBB, volume = "15", number = "4", pages = "1390--1402", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2715842", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A remaining key challenge of modern biology is annotating the functional roles of proteins. Various computational models have been proposed for this challenge. Most of them assume the annotations of annotated proteins are complete. But in fact, many of them are incomplete. We proposed a method called NewGOA to predict new Gene Ontology GO annotations for incompletely annotated proteins and for completely un-annotated ones. NewGOA employs a hybrid graph, composed of two types of nodes proteins and GO terms, to encode interactions between proteins, hierarchical relationships between terms and available annotations of proteins. To account for structural difference between GO terms subgraph and proteins subgraph, NewGOA applies a bi-random walks algorithm, which executes asynchronous random walks on the hybrid graph, to predict new GO annotations of proteins. Experimental study on archived GO annotations of two model species H. Sapiens and S. cerevisiae shows that NewGOA can more accurately and efficiently predict new annotations of proteins than other related methods. Experimental results also indicate the bi-random walks can explore and further exploit the structural difference between GO terms subgraph and proteins subgraph. The supplementary files and codes of NewGOA are available at: http://mlda.swu.edu.cn/codes.php?name=NewGOA.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ayday:2018:GIW, author = "Erman Ayday and Xiaoqian Jiang and Bradley Malin", title = "{GenoPri'16}: International Workshop on Genome Privacy and Security", journal = j-TCBB, volume = "15", number = "5", pages = "1403--1404", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2856959", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Backes:2018:SLS, author = "Michael Backes and Pascal Berrang and Mathias Humbert and Xiaoyu Shen and Verena Wolf", title = "Simulating the Large-Scale Erosion of Genomic Privacy Over Time", journal = j-TCBB, volume = "15", number = "5", pages = "1405--1412", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2859380", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The dramatically decreasing costs of DNA sequencing have triggered more than a million humans to have their genotypes sequenced. Moreover, these individuals increasingly make their genomic data publicly available, thereby creating privacy threats for themselves and their relatives because of their DNA similarities. More generally, an entity that gains access to a significant fraction of sequenced genotypes might be able to infer even the genomes of unsequenced individuals. In this paper, we propose a simulation-based model for quantifying the impact of continuously sequencing and publicizing personal genomic data on a population's genomic privacy. Our simulation probabilistically models data sharing and takes into account events such as migration and interracial mating. We exemplarily instantiate our simulation with a sample population of 1,000 individuals and evaluate the privacy under multiple settings over 6,000 genomic variants and a subset of phenotype-related variants. Our findings demonstrate that an increasing sharing rate in the future entails a substantial negative effect on the privacy of all older generations. Moreover, we find that mixed populations face a less severe erosion of privacy over time than more homogeneous populations. Finally, we demonstrate that genomic-data sharing can be much more detrimental for the privacy of the phenotype-related variants.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Raisaro:2018:PPS, author = "Jean Louis Raisaro and Gwangbae Choi and Sylvain Pradervand and Raphael Colsenet and Nathalie Jacquemont and Nicolas Rosat and Vincent Mooser and Jean-Pierre Hubaux", title = "Protecting Privacy and Security of Genomic Data in i2b2 with Homomorphic Encryption and Differential Privacy", journal = j-TCBB, volume = "15", number = "5", pages = "1413--1426", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2854782", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Re-use of patients' health records can provide tremendous benefits for clinical research. Yet, when researchers need to access sensitive/identifying data, such as genomic data, in order to compile cohorts of well-characterized patients for specific studies, privacy and security concerns represent major obstacles that make such a procedure extremely difficult if not impossible. In this paper, we address the challenge of designing and deploying in a real operational setting an efficient privacy-preserving explorer for genetic cohorts. Our solution is built on top of the i2b2 Informatics for Integrating Biology and the Bedside framework and leverages cutting-edge privacy-enhancing technologies such as homomorphic encryption and differential privacy. Solutions involving homomorphic encryption are often believed to be costly and immature for use in operational environments. Here, we show that, for specific applications, homomorphic encryption is actually a very efficient enabler. Indeed, our solution outperforms prior work by enabling a researcher to securely compute simple statistics on more than 3,000 encrypted genetic variants simultaneously for a cohort of 5,000 individuals in less than 5 seconds with commodity hardware. To the best of our knowledge, our privacy-preserving solution is the first to also be successfully deployed and tested in a operation setting Lausanne University Hospital.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bogdanov:2018:IEA, author = "Dan Bogdanov and Liina Kamm and Sven Laur and Ville Sokk", title = "Implementation and Evaluation of an Algorithm for Cryptographically Private Principal Component Analysis on Genomic Data", journal = j-TCBB, volume = "15", number = "5", pages = "1427--1432", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858818", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We improve the quality of cryptographically privacy-preserving genome-wide association studies by correctly handling population stratification-the inherent genetic difference of patient groups, e.g., people with different ancestries. Our approach is to use principal component analysis to reduce the dimensionality of the problem so that we get less spurious correlations between traits of interest and certain positions in the genome. While this approach is commonplace in practical genomic analysis, it has not been used within a privacy-preserving setting. In this paper, we use cryptographically secure multi-party computation to tackle principal component analysis, and present an implementation and experimental results showing the performance of the approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2018:GES, author = "De-Shuang Huang and Vitoantonio Bevilacqua and M. Michael Gromiha", title = "Guest Editorial for Special Section on the {12th International Conference on Intelligent Computing ICIC}", journal = j-TCBB, volume = "15", number = "5", pages = "1433--1435", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2848322", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kulandaisamy:2018:IAK, author = "A. Kulandaisamy and Ambuj Srivastava and Pradeep Kumar and R. Nagarajan and S. Binny Priya and M. Michael Gromiha", title = "Identification and Analysis of Key Residues in Protein-{RNA} Complexes", journal = j-TCBB, volume = "15", number = "5", pages = "1436--1444", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2834387", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-RNA complexes play important roles in various biological processes. The functions of protein-RNA complexes are dictated by their interactions, binding, stability, and affinity. In this work, we have identified the key residues KRs, which are involved in both stability and binding. We found that 42 percent of considered proteins share common binding and stabilizing residues, whereas these residues are distinct in 58 percent of the proteins. Overall, 5 percent of stabilizing and 3 percent of binding residues serve as key residues. These residues are enriched with the combination of polar, charged, aliphatic, and aromatic residues. Analysis on subclasses of protein-RNA complexes based on protein structural class, function and RNA type showed that regulatory proteins, and complexes with single stranded RNA and rRNA have appreciable number of key residues. Specifically, Arg, Tyr, and Thr are preferred in most of the subclasses of protein-RNA complexes. In addition, residues with similar chemical behavior have different preferences to be KRs, such that Arg, Tyr, Val, and Thr are preferred over Lys, Trp, Ile, and Ser, respectively. Atomic level contacts revealed that charged and polar-nonpolar contacts are dominant in enzymes, polar in structural, and nonpolar in regulatory proteins. On the other hand, polar-nonpolar contacts are enriched in all these classes of protein-RNA complexes. Further, the influence of sequence and structural features such as conservation score, surrounding hydrophobicity, solvent accessibility, secondary structure, and long-range order in key residues are also discussed. We envisage that the present study provides insights to understand the structural and functional aspects of protein-RNA complexes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2018:UDL, author = "Hongjie Wu and Chengyuan Cao and Xiaoyan Xia and Qiang Lu", title = "Unified Deep Learning Architecture for Modeling Biology Sequence", journal = j-TCBB, volume = "15", number = "5", pages = "1445--1452", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2760832", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of the spatial structure or function of biological macromolecules based on their sequences remains an important challenge in bioinformatics. When modeling biological sequences using traditional sequencing models, long-range interaction, complicated and variable output of labeled structures, and variable length of biological sequences usually lead to different solutions on a case-by-case basis. This study proposed a unified deep learning architecture based on long short-term memory or a gated recurrent unit to capture long-range interactions. The architecture designs the optional reshape operator to adapt to the diversity of the output labels and implements a training algorithm to support the training of sequence models capable of processing variable-length sequences. The merging and pooling operators enhances the ability of capturing short-range interactions between basic units of biological sequences. The proposed deep-learning architecture and its training algorithm might be capable of solving currently variable biological sequence-modeling problems under a unified framework. We validated the model on one of the most difficult biological sequence-modeling problems, protein residue interaction prediction. The results indicate that the accuracy of obtaining the residue interactions of the model exceeded popular approaches by 10 percent on multiple widely-used benchmarks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bao:2018:MFP, author = "Wenzheng Bao and Chang-An Yuan and Youhua Zhang and Kyungsook Han and Asoke K. Nandi and Barry Honig and De-Shuang Huang", title = "Mutli-Features Prediction of Protein Translational Modification Sites", journal = j-TCBB, volume = "15", number = "5", pages = "1453--1460", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2752703", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Post translational modification plays a significant role in the biological processing. The potential post translational modification is composed of the center sites and the adjacent amino acid residues which are fundamental protein sequence residues. It can be helpful to perform their biological functions and contribute to understanding the molecular mechanisms that are the foundations of protein design and drug design. The existing algorithms of predicting modified sites often have some shortcomings, such as lower stability and accuracy. In this paper, a combination of physical, chemical, statistical, and biological properties of a protein have been utilized as the features, and a novel framework is proposed to predict a protein's post translational modification sites. The multi-layer neural network and support vector machine are invoked to predict the potential modified sites with the selected features that include the compositions of amino acid residues, the E-H description of protein segments, and several properties from the AAIndex database. Being aware of the possible redundant information, the feature selection is proposed in the preprocessing step in this research. The experimental results show that the proposed method has the ability to improve the accuracy in this classification issue.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lee:2018:SBP, author = "Wook Lee and Byungkyu Park and Kyungsook Han", title = "Sequence-Based Prediction of Putative Transcription Factor Binding Sites in {DNA} Sequences of Any Length", journal = j-TCBB, volume = "15", number = "5", pages = "1461--1469", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2773075", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A transcription factor TF is a protein that regulates gene expression by binding to specific DNA sequences. Despite recent advances in experimental techniques for identifying transcription factor binding sites TFBS in DNA sequences, a large number of TFBS are to be unveiled in many species. Several computational methods developed for predicting TFBS in DNA are tissue- or species-specific methods, and therefore cannot be used without prior knowledge of tissue or species. Some computational methods are applicable to identifying TFBS in short DNA sequences only. In this paper, we propose a new learning method for predicting TFBS in DNA of any length using the composition, transition, and distribution of nucleotides and amino acids in DNA and TF sequences. In independent testing of the method on datasets that were not used in training the method, the accuracy and MCC were as high as 81.84 percent and 0.634, respectively. The proposed method can be a useful aid for selecting potential TFBS in a large amount of DNA sequences before conducting biochemical experiments to empirically determine TFBS. The program and data sets are available at http://bclab.inha.ac.kr/TFbinding.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lin:2018:PHR, author = "Xiaoli Lin and Xiaolong Zhang", title = "Prediction of Hot Regions in {PPIs} Based on Improved Local Community Structure Detecting", journal = j-TCBB, volume = "15", number = "5", pages = "1470--1479", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2793858", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The hot regions in PPIs are some assembly regions which are composed of the tightly packed HotSpots. The discovery of hot regions helps to understand life activities and has very important value for biological applications. The identification of hot regions is the basis for protein design and cancer prevention. The existing algorithms of predicting hot regions often have some defects, such as low accuracy and unstability. This paper proposes a novel hot region prediction method based on diverse biological characteristics. First, feature evaluation is employed by using an improved mRMR method. Then, SVM is adopted to create cassification model based on the features selected. In addition, a new clustering algorithm, namely LCSD Local community structure detecting, is developed to detect and analyze the conformation of hot regions. In the clustering process, the link similarity of protein residues is introduced to handle the boundary nodes. This algorithm can effectively deal with the missing residue nodes and control the local community boundaries. The results indicate that the spatial structure of hot regions can be obtained more effectively, and that our method is more effective than previous methods for precise identification of hot regions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Deng:2018:IIG, author = "Su-Ping Deng and Wenxing Hu and Vince D. Calhoun and Yu-Ping Wang", title = "Integrating Imaging Genomic Data in the Quest for Biomarkers of Schizophrenia Disease", journal = j-TCBB, volume = "15", number = "5", pages = "1480--1491", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2748944", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "It's increasingly important but difficult to determine potential biomarkers of schizophrenia SCZ disease, owing to the complex pathophysiology of this disease. In this study, a network-fusion based framework was proposed to identify genetic biomarkers of the SCZ disease. A three-step feature selection was applied to single nucleotide polymorphisms SNPs, DNA methylation, and functional magnetic resonance imaging fMRI data to select important features, which were then used to construct two gene networks in different states for the SNPs and DNA methylation data, respectively. Two health networks one is for SNP data and the other is for DNA methylation data were combined into one health network from which health minimum spanning trees MSTs were extracted. Two disease networks also followed the same procedures. Those genes with significant changes were determined as SCZ biomarkers by comparing MSTs in two different states and they were finally validated from five aspects. The effectiveness of the proposed discovery framework was also demonstrated by comparing with other network-based discovery methods. In summary, our approach provides a general framework for discovering gene biomarkers of the complex diseases by integrating imaging genomic data, which can be applied to the diagnosis of the complex diseases in the future.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Russo:2018:CPN, author = "Giulia Russo and Marzio Pennisi and Roberta Boscarino and Francesco Pappalardo", title = "Continuous {Petri} Nets and {microRNA} Analysis in Melanoma", journal = j-TCBB, volume = "15", number = "5", pages = "1492--1499", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2733529", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Personalized target therapies represent one of the possible treatment strategies to fight the ongoing battle against cancer. New treatment interventions are still needed for an effective and successful cancer therapy. In this scenario, we simulated and analyzed the dynamics of BRAF V600E melanoma patients treated with BRAF inhibitors in order to find potentially interesting targets that may make standard treatments more effective in particularly aggressive tumors that may not respond to selective inhibitor drugs. To this aim, we developed a continuous Petri Net model that simulates fundamental signalling cascades involved in melanoma development, such as MAPK and PI3K/AKT, in order to deeply analyze these complex kinase cascades and predict new crucial nodes involved in melanomagenesis. The model pointed out that some microRNAs, like hsa-mir-132, downregulates expression levels of p120RasGAP: under high concentrations of p120RasGAP, MAPK pathway activation is significantly decreased and consequently also PI3K/PDK1/AKT activation. Furthermore, our analysis carried out through the Genomic Data Commons GDC Data Portal shows the evidence that hsa-mir-132 is significantly associated with clinical outcome in melanoma cancer genomic data sets of BRAF-mutated patients. In conclusion, targeting miRNAs through antisense oligonucleotides technology may suggest the way to enhance the action of BRAF-inhibitors.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2018:LRS, author = "Jian Liu and Yuhu Cheng and Xuesong Wang and Xiaoluo Cui and Yi Kong and Junping Du", title = "Low Rank Subspace Clustering via Discrete Constraint and Hypergraph Regularization for Tumor Molecular Pattern Discovery", journal = j-TCBB, volume = "15", number = "5", pages = "1500--1512", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2834371", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tumor clustering is a powerful approach for cancer class discovery which is crucial to the effective treatment of cancer. Many traditional clustering methods such as NMF-based models, have been widely used to identify tumors. However, they cannot achieve satisfactory results. Recently, subspace clustering approaches have been proposed to improve the performance by dividing the original space into multiple low-dimensional subspaces. Among them, low rank representation is becoming a popular approach to attain subspace clustering. In this paper, we propose a novel Low Rank Subspace Clustering model via Discrete Constraint and Hypergraph Regularization DHLRS. The proposed method learns the cluster indicators directly by using discrete constraint, which makes the clustering task simple. For each subspace, we adopt Schatten $p$-norm to better approximate the low rank constraint. Moreover, Hypergraph Regularization is adopted to infer the complex relationship between genes and intrinsic geometrical structure of gene expression data in each subspace. Finally, the molecular pattern of tumor gene expression data sets is discovered according to the optimized cluster indicators. Experiments on both synthetic data and real tumor gene expression data sets prove the effectiveness of proposed DHLRS.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2018:GEA, author = "Lusheng Wang and Shuai Cheng Li and Yi-Ping Phoebe Chen", title = "Guest Editorial for the {15th Asia Pacific Bioinformatics Conference}", journal = j-TCBB, volume = "15", number = "5", pages = "1513--1514", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2843838", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Paszek:2018:EAG, author = "Jaroslaw Paszek and Pawel Gorecki", title = "Efficient Algorithms for Genomic Duplication Models", journal = j-TCBB, volume = "15", number = "5", pages = "1515--1524", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2706679", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "An important issue in evolutionary molecular biology is to discover genomic duplication episodes and their correspondence to the species tree. Existing approaches vary in the two fundamental aspects: the choice of evolutionary scenarios that model allowed locations of duplications in the species tree, and the rules of clustering gene duplications from gene trees into a single multiple duplication event. Here we study the method of clustering called minimum episodes for several models of allowed evolutionary scenarios with a focus on interval models in which every gene duplication has an interval consisting of allowed locations in the species tree. We present mathematical foundations for general genomic duplication problems. Next, we propose the first linear time and space algorithm for minimum episodes clustering jointly for any interval model and the algorithm for the most general model in which every evolutionary scenario is allowed. We also present a comparative study of different models of genomic duplication based on simulated and empirical datasets. We provided algorithms and tools that could be applied to solve efficiently minimum episodes clustering problems. Our comparative study helps to identify which model is the most reasonable choice in inferring genomic duplication events.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mizera:2018:RTS, author = "Andrzej Mizera and Jun Pang and Qixia Yuan", title = "Reviving the Two-State {Markov} Chain Approach", journal = j-TCBB, volume = "15", number = "5", pages = "1525--1537", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2704592", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Probabilistic Boolean networks PBNs is a well-established computational framework for modelling biological systems. The steady-state dynamics of PBNs is of crucial importance in the study of such systems. However, for large PBNs, which often arise in systems biology, obtaining the steady-state distribution poses a significant challenge. In this paper, we revive the two-state Markov chain approach to solve this problem. This paper contributes in three aspects. First, we identify a problem of generating biased results with the approach and we propose a few heuristics to avoid such a pitfall. Second, we conduct an extensive experimental comparison of the extended two-state Markov chain approach and another approach based on the Skart method. We analyze the results with machine learning techniques and we show that statistically the two-state Markov chain approach has a better performance. Finally, we demonstrate the potential of the extended two-state Markov chain approach on a case study of a large PBN model of apoptosis in hepatocytes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{An:2018:LNN, author = "Shuai An and Jun Wang and Jinmao Wei", title = "Local-Nearest-Neighbors-Based Feature Weighting for Gene Selection", journal = j-TCBB, volume = "15", number = "5", pages = "1538--1548", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2712775", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Selecting functional genes is essential for analyzing microarray data. Among many available feature gene selection approaches, the ones on the basis of the large margin nearest neighbor receive more attention due to their low computational costs and high accuracies in analyzing the high-dimensional data. Yet, there still exist some problems that hamper the existing approaches in sifting real target genes, including selecting erroneous nearest neighbors, high sensitivity to irrelevant genes, and inappropriate evaluation criteria. Previous pioneer works have partly addressed some of the problems, but none of them are capable of solving these problems simultaneously. In this paper, we propose a new local-nearest-neighbors-based feature weighting approach to alleviate the above problems. The proposed approach is based on the trick of locally minimizing the within-class distances and maximizing the between-class distances with the $k$ nearest neighbors rule. We further define a feature weight vector, and construct it by minimizing the cost function with a regularization term. The proposed approach can be applied naturally to the multi-class problems and does not require extra modification. Experimental results on the UCI and the open microarray data sets validate the effectiveness and efficiency of the new approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2018:BET, author = "Nan Jiang and Wenge Rong and Yifan Nie and Yikang Shen and Zhang Xiong", title = "Biological Event Trigger Identification with Noise Contrastive Estimation", journal = j-TCBB, volume = "15", number = "5", pages = "1549--1559", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2710048", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological Event Extraction is an important task towards the goal of extracting biomedical knowledge from the scientific publications by capturing biomedical entities and their complex relations from the texts. As a crucial step in event extraction, event trigger identification, assigning words with suitable trigger category, has recently attracted substantial attention. As triggers are scattered in large corpus, traditional linguistic parsers are hard to generate syntactic features from them. Thereby, trigger sparsity problem restricts the model's learning process and becomes one of the main hinder in trigger identification. In this paper, we employ Noise Contrastive Estimation with Multi-Layer Perceptron model for solving triggers' sparsity problem. Meanwhile, in the light of recent advance in word distributed representation, word-embedding feature generated by language model is utilized for semantic and syntactic information extraction. Finally, experimental study on commonly used MLEE dataset against baseline methods has demonstrated its promising result.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lafond:2018:GTC, author = "Manuel Lafond and Cedric Chauve and Nadia El-Mabrouk and Aida Ouangraoua", title = "Gene Tree Construction and Correction Using {SuperTree} and Reconciliation", journal = j-TCBB, volume = "15", number = "5", pages = "1560--1570", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2720581", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The supertree problem asking for a tree displaying a set of consistent input trees has been largely considered for the reconstruction of species trees. Here, we rather explore this framework for the sake of reconstructing a gene tree from a set of input gene trees on partial data. In this perspective, the phylogenetic tree for the species containing the genes of interest can be used to choose among the many possible compatible ``supergenetrees'', the most natural criteria being to minimize a reconciliation cost. We develop a variety of algorithmic solutions for the construction and correction of gene trees using the supertree framework. A dynamic programming supertree algorithm for constructing or correcting gene trees, exponential in the number of input trees, is first developed for the less constrained version of the problem. It is then adapted to gene trees with nodes labeled as duplication or speciation, the additional constraint being to preserve the orthology and paralogy relations between genes. Then, a quadratic time algorithm is developed for efficiently correcting an initial gene tree while preserving a set of ``trusted'' subtrees, as well as the relative phylogenetic distance between them, in both cases of labeled or unlabeled input trees. By applying these algorithms to the set of Ensembl gene trees, we show that this new correction framework is particularly useful to correct weakly-supported duplication nodes. The C++ source code for the algorithms and simulations described in the paper are available at https://github.com/UdeM-LBIT/SuGeT.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mykowiecka:2018:IGS, author = "Agnieszka Mykowiecka and Pawel Szczesny and Pawel Gorecki", title = "Inferring Gene-Species Assignments in the Presence of Horizontal Gene Transfer", journal = j-TCBB, volume = "15", number = "5", pages = "1571--1578", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2707083", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Background: Microbial communities from environmental samples show great diversity as bacteria quickly responds to changes in their ecosystems. To assess the scenario of the actual changes, metagenomics experiments aimed at sequencing genomic DNA from such samples are performed. These new obtained sequences together with already known are used to infer phylogenetic trees assessing the taxonomic groups the species with these genes belong to. Here, we propose the first approach to the gene-species assignment problem by using reconciliation with horizontal gene transfer. Results: We propose efficient algorithms that search for optimal gene-species mappings taking into account gene duplication, loss and transfer events under two tractable models of HGT reconciliation. Conclusions: We calculate both the optimal cost and all possible optimal scenarios. Furthermore as the number of optimal reconstructions can be large, we use a Monte-Carlo method for the inference of approximate distributions of gene-species assignments. We demonstrate the applicability on empirical and simulated datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:EMS, author = "Yue Zhang and Chunfang Zheng and David Sankoff", title = "Evolutionary Model for the Statistical Divergence of Paralogous and Orthologous Gene Pairs Generated by Whole Genome Duplication and Speciation", journal = j-TCBB, volume = "15", number = "5", pages = "1579--1584", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2712695", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We outline a principled approach to the analysis of duplicate gene similarity distributions, based on a model integrating sequence divergence and the process of fractionation of duplicate genes resulting from whole genome duplication WGD. This model allows us to predict duplicate gene similarity distributions for a series of two or three WGD, for whole genome triplication followed by a WGD, and for triplication, followed by speciation, followed by WGD. We calculate the probabilities of all possible fates of a gene pair as its two members proliferate or are lost, predicting the number of surviving pairs from each event. We discuss how to calculate maximum likelihood estimators for the parameters of these models, illustrating with an analysis of the distribution of paralog similarities in the poplar genome.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hartmann:2018:GRI, author = "Tom Hartmann and Nicolas Wieseke and Roded Sharan and Martin Middendorf and Matthias Bernt", title = "Genome Rearrangement with {ILP}", journal = j-TCBB, volume = "15", number = "5", pages = "1585--1593", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2708121", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The weighted Genome Sorting Problem wGSP is to find a minimum-weight sequence of rearrangement operations that transforms a given gene order into another given gene order using rearrangement operations that are associated with a predefined weight. This paper presents a polynomial sized Integer Linear Program --- called GeRe-ILP --- for solving the wGSP for the following three types of rearrangement operations: inversion, transposition, and inverse transposition. GeRe-ILP uses $ O(n^3) $ variables and $ O(n^3) $ constraints for gene orders of length $n$. It is studied experimentally on simulated data how different weighting schemes influence the reconstructed scenarios. The influences of the length of the gene orders and of the size of the reconstructed scenarios on the runtime of GeRe-ILP are studied as well.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sedaghat:2018:CSU, author = "Nafiseh Sedaghat and Mahmood Fathy and Mohammad Hossein Modarressi and Ali Shojaie", title = "Combining Supervised and Unsupervised Learning for Improved {miRNA} Target Prediction", journal = j-TCBB, volume = "15", number = "5", pages = "1594--1604", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2727042", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs are short non-coding RNAs which bind to mRNAs and regulate their expression. MiRNAs have been found to be associated with initiation and progression of many complex diseases. Investigating miRNAs and their targets can thus help develop new therapies by designing anti-miRNA oligonucleotides. While existing computational approaches can predict miRNA targets, these predictions have low accuracy. In this paper, we propose a two-step approach to refine the results of sequence-based prediction algorithms. The first step, which is based on our previous work, uses an ensemble learning approach that combines multiple existing methods. The second step utilizes support vector machine SVM classifiers in one- and two-class modes to infer miRNA-mRNA interactions based on both binding features, as well as network features extracted from gene regulatory network. Experimental results using two real data sets from TCGA indicate that the use of two-class SVM classification significantly improves the precision of miRNA-mRNA prediction.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hamad:2018:DWU, author = "Safwat Hamad and Ahmed Elhadad and Amal Khalifa", title = "{DNA} Watermarking Using Codon Postfix Technique", journal = j-TCBB, volume = "15", number = "5", pages = "1605--1610", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2754496", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "DNA watermarking is a data hiding technique that aims to protect the copyright of DNA sequences and ensures the security of private genetic information. In this paper, we proposed a novel DNA watermarking technique that can be used to embed binary bits into real DNA sequences. The proposed technique mutates the codon postfix according to the embedded bit. Our method was tested for a sample set of DNA sequences and the extracted bits showed robustness against mutation. Furthermore, the proposed DNA watermarking method proved to be secured, undetectable, resistance, and preservative to biological functions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Duarte-Sanchez:2018:HAM, author = "Jorge E. Duarte-Sanchez and Jaime Velasco-Medina and Pedro A. Moreno", title = "Hardware Accelerator for the Multifractal Analysis of {DNA} Sequences", journal = j-TCBB, volume = "15", number = "5", pages = "1611--1624", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2731339", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The multifractal analysis has allowed to quantify the genetic variability and non-linear stability along the human genome sequence. It has some implications in explaining several genetic diseases given by some chromosome abnormalities, among other genetic particularities. The multifractal analysis of a genome is carried out by dividing the complete DNA sequence in smaller fragments and calculating the generalized dimension spectrum of each fragment using the chaos game representation and the box-counting method. This is a time consuming process because it involves the processing of large data sets using floating-point representation. In order to reduce the computation time, we designed an application-specific processor, here called multifractal processor, which is based on our proposed hardware-oriented algorithm for calculating efficiently the generalized dimension spectrum of DNA sequences. The multifractal processor was implemented on a low-cost SoC-FPGA and was verified by processing a complete human genome. The execution time and numeric results of the Multifractal processor were compared with the results obtained from the software implementation executed in a 20-core workstation, achieving a speed up of 2.6x and an average error of 0.0003 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nguyen:2018:HTN, author = "Nha Nguyen and An Vo and Haibin Sun and Heng Huang", title = "Heavy-Tailed Noise Suppression and Derivative Wavelet Scalogram for Detecting {DNA} Copy Number Aberrations", journal = j-TCBB, volume = "15", number = "5", pages = "1625--1635", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2723884", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Most existing array comparative genomic hybridization array CGH data processing methods and evaluation models assumed that the probability density function pdf of noise in array CGH data is a Gaussian distribution. However, in practice, such noise distribution is peaky and heavy-tailed. Therefore, a Gaussian pdf is not adequate to approximate the noise in array CGH data and hence introduces wrong detections of chromosomal aberrations and leads misunderstanding on disease pathogenesis. A more accurate and sufficient model of noise in array CGH data is necessary and beneficial to the detection of DNA copy number variations. We analyze the real array CGH data from different platforms and show that the distribution of noise in array CGH data is fitted very well by generalized Gaussian distribution GGD. Based on our new noise model, we propose a novel array CGH processing method combining the advantages of both the smoothing and segmentation approaches. The new method uses generalized Gaussian bivariate shrinkage function and one-directional derivative wavelet scalogram in generalized Gaussian noise. In the smoothing step, with the new generalized Gaussian noise model, we derive the heavy-tailed noise suppression algorithm in stationary wavelet domain. In the segmentation step, the 1D Gaussian derivative wavelet scalogram is employed to detect break points. Both real and simulated array CGH data with different noises such as Gaussian noise, GGD noise, and real noise are used in our experiments. We demonstrate that our new method outperforms other state-of-the-art methods, in terms of both root mean squared errors and receiver operating characteristic curves.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ma:2018:ICS, author = "Xiaoke Ma and Penggang Sun and Guimin Qin", title = "Identifying Condition-Specific Modules by Clustering Multiple Networks", journal = j-TCBB, volume = "15", number = "5", pages = "1636--1648", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2761339", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Condition-specific modules in multiple networks must be determined to reveal the underlying molecular mechanisms of diseases. Current algorithms exhibit limitations such as low accuracy and high sensitivity to the number of networks because these algorithms discover condition-specific modules in multiple networks by separating specificity and modularity of modules. To overcome these limitations, we characterize condition-specific module as a group of genes whose connectivity is strong in the corresponding network and weak in other networks; this strategy can accurately depict the topological structure of condition-specific modules. We then transform the condition-specific module discovery problem into a clustering problem in multiple networks. We develop an efficient heuristic algorithm for the Specific Modules in Multiple N etworks SMMN, which discovers the condition-specific modules by considering multiple networks. By using the artificial networks, we demonstrate that SMMN outperforms state-of-the-art methods. In breast cancer networks, stage-specific modules discovered by SMMN are more discriminative in predicting cancer stages than those obtained by other techniques. In pan-cancer networks, cancer-specific modules are more likely to associate with survival time of patients, which is critical for cancer therapy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2018:IAD, author = "Jin Liu and Jianxin Wang and Zhenjun Tang and Bin Hu and Fang-Xiang Wu and Yi Pan", title = "Improving {Alzheimer}'s Disease Classification by Combining Multiple Measures", journal = j-TCBB, volume = "15", number = "5", pages = "1649--1659", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2731849", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Several anatomical magnetic resonance imaging MRI markers for Alzheimer's disease AD have been identified. Cortical gray matter volume, cortical thickness, and subcortical volume have been used successfully to assist the diagnosis of Alzheimer's disease including its early warning and developing stages, e.g., mild cognitive impairment MCI including MCI converted to AD MCIc and MCI not converted to AD MCInc. Currently, these anatomical MRI measures have mainly been used separately. Thus, the full potential of anatomical MRI scans for AD diagnosis might not yet have been used optimally. Meanwhile, most studies currently only focused on morphological features of regions of interest ROIs or interregional features without considering the combination of them. To further improve the diagnosis of AD, we propose a novel approach of extracting ROI features and interregional features based on multiple measures from MRI images to distinguish AD, MCI including MCIc and MCInc, and health control HC. First, we construct six individual networks based on six different anatomical measures i.e., CGMV, CT, CSA, CC, CFI, and SV and Automated Anatomical Labeling AAL atlas for each subject. Then, for each individual network, we extract all node ROI features and edge interregional features, and denoted as node feature set and edge feature set, respectively. Therefore, we can obtain six node feature sets and six edge feature sets from six different anatomical measures. Next, each feature within a feature set is ranked by $F$-score in descending order, and the top ranked features of each feature set are applied to MKBoost algorithm to obtain the best classification accuracy. After obtaining the best classification accuracy, we can get the optimal feature subset and the corresponding classifier for each node or edge feature set. Afterwards, to investigate the classification performance with only node features, we proposed a weighted multiple kernel learning wMKL framework to combine these six optimal node feature subsets, and obtain a combined classifier to perform AD classification. Similarly, we can obtain the classification performance with only edge features. Finally, we combine both six optimal node feature subsets and six optimal edge feature subsets to further improve the classification performance. Experimental results show that the proposed method outperforms some state-of-the-art methods in AD classification, and demonstrate that different measures contain complementary information.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ren:2018:IAS, author = "Shuai Ren and Yan Shi and Maolin Cai and Weiqing Xu", title = "Influence of Airway Secretion on Airflow Dynamics of Mechanical Ventilated Respiratory System", journal = j-TCBB, volume = "15", number = "5", pages = "1660--1668", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2737621", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Secretions in the airways of mechanical ventilated patients are extremely dangerous to patients' health. In recent studies, the continuous constant airflow is adopted, however, it is not consistent with a clinical situation. To study respiratory airflow dynamic characteristics with secretion in the airways, a mathematical model based on clinical mechanical ventilation is established in this paper. To illustrate the secretion's influence on the airflow dynamics of mechanical ventilated respiratory system, three key parameters which are cross section area ratio of secretion/ pipe, air-secretion contact area, and secretion viscosity are involved in the study. Through the experimental study, the accuracy and dependability of the model are confirmed. By the simulation study, we find that: based on the model which combines two airways and two model lungs, when one of the airways was covered with secretion, the maximum pressure of the model lung which is attached to the end of this airway maintains constant when the cross section area ratio is less than 66 percent, and then it tends to decline sharply with the ratio increasing, but it remains constant with the augment of air-secretion contact area, the maximum flow declines both with the increasing of cross section area ratio and air-secretion contact area. Furthermore, as for the other airway, the maximum pressure of the model lung has no significant changes with the augment of area ratio and air-secretion contact area, however, along with the increasing of area ratio and air-secretion contact area, the maximum flow rises up. Moreover, the secretion viscosity has barely any influence on airflow dynamics. According to our analysis results, we conclude that the cross section area ratio of secretion/pipe has bigger influence on airflow dynamic characteristics than air-secretion contact area and secretion viscosity. This paper lays the foundation for the further study of efficacy and safety in mechanical ventilation and the secretion clearance of mechanical ventilated patients. In addition, the mathematical model proposed in this paper can also be referred to study on the secretion movement in human airways.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Vijayan:2018:MNA, author = "Vipin Vijayan and Tijana Milenkovic", title = "Multiple Network Alignment via {MultiMAGNA++}", journal = j-TCBB, volume = "15", number = "5", pages = "1669--1682", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2740381", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Network alignment NA aims to find a node mapping that identifies topologically or functionally similar network regions between molecular networks of different species. Analogous to genomic sequence alignment, NA can be used to transfer biological knowledge from well- to poorly-studied species between aligned network regions. Pairwise NA PNA finds similar regions between two networks while multiple NA MNA can align more than two networks. We focus on MNA. Existing MNA methods aim to maximize total similarity over all aligned nodes node conservation. Then, they evaluate alignment quality by measuring the amount of conserved edges, but only after the alignment is constructed. Directly optimizing edge conservation during alignment construction in addition to node conservation may result in superior alignments. Thus, we present a novel MNA method called multiMAGNA++ that can achieve this. Indeed, multiMAGNA++ outperforms or is on par with existing MNA methods, while often completing faster than existing methods. That is, multiMAGNA++ scales well to larger network data and can be parallelized effectively. During method evaluation, we also introduce new MNA quality measures to allow for more fair MNA method comparison compared to the existing alignment quality measures. The multiMAGNA++ code is available on the method's web page at http://nd.edu/~cone/multiMAGNA++/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2018:NFN, author = "Chunjiang Yu and Wentao Wu and Jing Wang and Yuxin Lin and Yang and Jiajia Chen and Fei Zhu and Bairong Shen", title = "{NGS-FC}: a Next-Generation Sequencing Data Format Converter", journal = j-TCBB, volume = "15", number = "5", pages = "1683--1691", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2722442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the widespread implementation of next-generation sequencing NGS technologies, millions of sequences have been produced. A lot of databases were created to store and organize the high-throughput sequencing data. Numerous analysis software programs and tools have been developed over the past years. Most of them use specific formats for data representation and storage. Data interoperability becomes a crucial challenge and many tools have been developed to convert NGS data from one format to another. However, most of them were developed for specific and limited formats. Here, we present NGS-FC Next-Generation Sequencing Format Converter, which provides a framework to support the conversion between several formats. It supports 14 formats now and provides interfaces to enable users to improve the existing converters and add new ones. Moreover, NGS-FC achieved the overall competitive performance in comparison with some existing converters in terms of RAM usage and running time. The software is written in Java and can be executed standalone. The source code and documentation are freely available at http://sysbio.suda.edu.cn/NGS-FC.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jamil:2018:OPQ, author = "Hasan M. Jamil", title = "Optimizing Phylogenetic Queries for Performance", journal = j-TCBB, volume = "15", number = "5", pages = "1692--1705", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2743706", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The vast majority of phylogenetic databases do not support declarative querying using which their contents can be flexibly and conveniently accessed and the template based query interfaces they support do not allow arbitrary speculative queries. They therefore also do not support query optimization leveraging unique phylogeny properties. While a small number of graph query languages such as XQuery, Cypher, and GraphQL exist for computer savvy users, most are too general and complex to be useful for biologists, and too inefficient for large phylogeny querying. In this paper, we discuss a recently introduced visual query language, called PhyQL, that leverages phylogeny specific properties to support essential and powerful constructs for a large class of phylogentic queries. We develop a range of pruning aids, and propose a substantial set of query optimization strategies using these aids suitable for large phylogeny querying. A hybrid optimization technique that exploits a set of indices and ``graphlet'' partitioning is discussed. A ``fail soonest'' strategy is used to avoid hopeless processing and is shown to produce dividends. Possible novel optimization techniques yet to be explored are also discussed.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2018:CPT, author = "Min Liu and Yue He and Weili Qian and Yangliu Wei and Xiaoyan Liu", title = "Cell Population Tracking in a Honeycomb Structure Using an {IMM} Filter Based {$3$D} Local Graph Matching Model", journal = j-TCBB, volume = "15", number = "5", pages = "1706--1717", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2760300", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Developing algorithms for plant cell population tracking is very critical for the modeling of plant cell growth pattern and gene expression dynamics. The tracking of plant cells in microscopic image stacks is very challenging for several reasons: 1 plant cells are densely packed in a specific honeycomb structure; 2 they are frequently dividing; and 3 they are imaged in different layers within 3D image stacks. Based on an existing 2D local graph matching algorithm, this paper focuses on building a 3D plant cell matching model, by exploiting the cells' 3D spatiotemporal context. Furthermore, the Interacting Multi-Model filter IMM is combined with the 3D local graph matching model to track the plant cell population simultaneously. Because our tracking algorithm does not require the identification of ``tracking seeds'', the tracking stability and efficiency are greatly enhanced. Last, the plant cell lineages are achieved by associating the cell tracklets, using a maximum-a-posteriori MAP method. Compared with the 2D matching method, the experimental results on multiple datasets show that our proposed approach does not only greatly improve the tracking accuracy by 18 percent, but also successfully tracks the plant cells located at the high curvature primordial region, which is not addressed in previous work.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2018:SCF, author = "Shifu Chen and Ming Liu and Xiaoni Zhang and Renwen Long and Yixing Wang and Yue Han and Shiwei Zhang and Mingyan Xu and Jia Gu", title = "A Study of Cell-Free {DNA} Fragmentation Pattern and Its Application in {DNA} Sample Type Classification", journal = j-TCBB, volume = "15", number = "5", pages = "1718--1722", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2723388", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Plasma cell-free DNA cfDNA has certain fragmentation patterns, which can bring non-random base content curves of the sequencing data's beginning cycles. We studied the patterns and found that we could determine whether a sample is cfDNA or not by just looking into the first 10 cycles of its base content curves. We analyzed 3,189 FastQ files, including 1,442 plasma cfDNA, 1,234 genomic DNA, 507 FFPE tumour DNA, and 6 urinary cfDNA. By deep analyzing these data, we found the patterns were stable enough to distinguish cfDNA from other kinds of DNA samples. Based on this finding, we built classification models to recognize cfDNA samples by their sequencing data. Pattern recognition models were then trained with different classification algorithms like k-nearest neighbors KNN, random forest, and support vector machine SVM. The result of 1,000 iteration .632+ bootstrapping showed that all these classifiers could give an average accuracy higher than 98 percent, indicating that the cfDNA patterns are unique and can make the dataset highly separable. The best result was obtained using a random forest classifier with a 99.89 percent average accuracy $ \sigma = 0.00068 $. A tool called CfdnaPattern http://github.com/OpenGene/CfdnaPattern has been developed to train the model and to predict whether a sample is cfDNA or not.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gorecki:2018:BDG, author = "Pawel Gorecki and Oliver Eulenstein", title = "Bijective Diameters of Gene Tree Parsimony Costs", journal = j-TCBB, volume = "15", number = "5", pages = "1723--1727", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2735968", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Synthesizing median trees from a collection of gene trees under the biologically motivated gene tree parsimony GTP costs has provided credible species tree estimates. GTP costs are defined for each of the classic evolutionary processes. These costs count the minimum number of events necessary to reconcile the gene tree with the species tree where the leaf-genes are mapped to the leaf-species through a function called labeling. To better understand the synthesis of median trees under these costs, there is an increased interest in analyzing their diameters. The diameters of a GTP cost between a gene tree and a species tree are the maximum values of this cost of one or both topologies of the trees involved. We are concerned about the diameters of the GTP costs under bijective labelings. While these diameters are linear time computable for the gene duplication and deep coalescence costs, this has been unknown for the classic gene duplication and loss, and for the loss cost. For the first time, we show how to compute these diameters and proof that this can be achieved in linear time, and thus, completing the computational time analysis for all of the bijective diameters under the GTP costs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2018:FWI, author = "Youyuan Li and Yingping Zhuang", title = "{fmpRPMF}: a {Web} Implementation for Protein Identification by Robust Peptide Mass Fingerprinting", journal = j-TCBB, volume = "15", number = "5", pages = "1728--1731", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2762682", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Peptide mass fingerprinting continues to play an important role in current proteomics studies based on its good performance in sample throughput, specificity for single peptides, and insensitivity to unexpected post-translational modifications as compared with MSn. We previously proposed and evaluated the use of feature-matching pattern-based support vector machines SVMs for robust protein identification. This approach is now facilitated with an updated web server fmpRPMF incorporated with several newly developed or improved modules and workflows allowing identification of proteins from MS1 data. Development of the latest fmpRPMF web tool successfully provides a rapid and effective strategy for narrowing the range of candidate proteins. First, a mass-scanning procedure screens all candidate proteins matching the theoretical peptide mass at least three times, thereby reducing the number of candidate proteins from tens of thousands to thousands. Second, a crude ranking procedure screens true-positive proteins among the top six ranked times of candidates based on 17 selected features to reduce the number used for SVM prediction from thousands to tens. The improvement of forecasting efficiency met the requirements of multi-user and multi-task identification for web services. The updated fmpRPMF server is freely available at http://bioinformatics.datawisdom.net/fmp.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gonzalez-Dominguez:2018:MPC, author = "Jorge Gonzalez-Dominguez and Maria J. Martin", title = "{MPIGeneNet}: Parallel Calculation of Gene Co-Expression Networks on Multicore Clusters", journal = j-TCBB, volume = "15", number = "5", pages = "1732--1737", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2761340", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, we present MPIGeneNet, a parallel tool that applies Pearson's correlation and Random Matrix Theory to construct gene co-expression networks. It is based on the state-of-the-art sequential tool RMTGeneNet, which provides networks with high robustness and sensitivity at the expenses of relatively long runtimes for large scale input datasets. MPIGeneNet returns the same results as RMTGeneNet but improves the memory management, reduces the I/O cost, and accelerates the two most computationally demanding steps of co-expression network construction by exploiting the compute capabilities of common multicore CPU clusters. Our performance evaluation on two different systems using three typical input datasets shows that MPIGeneNet is significantly faster than RMTGeneNet. As an example, our tool is up to 175.41 times faster on a cluster with eight nodes, each one containing two 12-core Intel Haswell processors. The source code of MPIGeneNet, as well as a reference manual, are available at https://sourceforge.net/projects/mpigenenet/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shekhar:2018:STE, author = "Shubhanshu Shekhar and Sebastien Roch and Siavash Mirarab", title = "Species Tree Estimation Using {ASTRAL}: How Many Genes Are Enough?", journal = j-TCBB, volume = "15", number = "5", pages = "1738--1747", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2757930", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Species tree reconstruction from genomic data is increasingly performed using methods that account for sources of gene tree discordance such as incomplete lineage sorting. One popular method for reconstructing species trees from unrooted gene tree topologies is ASTRAL. In this paper, we derive theoretical sample complexity results for the number of genes required by ASTRAL to guarantee reconstruction of the correct species tree with high probability. We also validate those theoretical bounds in a simulation study. Our results indicate that ASTRAL requires $ O(f^{-2} \log n) $ gene trees to reconstruct the species tree correctly with high probability where $n$ is the number of species and $f$ is the length of the shortest branch in the species tree. Our simulations, some under the anomaly zone, show trends consistent with the theoretical bounds and also provide some practical insights on the conditions where ASTRAL works well.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tian:2018:GES, author = "Tianhai Tian and Jingshan Huang", title = "Guest Editorial for Special Section on {BIBM 2015}", journal = j-TCBB, volume = "15", number = "6", pages = "1752--1753", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2870626", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The six papers in this special section were presented at the IEEE BIBM 2015 conference that was held in Washington, D.C., November 9-12, 2015. The scientific program highlighted five themes to provide breadth, depth, and synergy for research collaboration: 1 genomics and molecular structure, function, and evolution; 2 computational systems biology; 3 medical informatics and translational bioinformatics; 4 cross-cutting computational methods and bioinformatics infrastructures; and 5 healthcare informatics,", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chiu:2018:ADR, author = "Yu-Chiao Chiu and Tzu-Hung Hsiao and Li-Ju Wang and Yidong Chen and Eric Y. Chuang", title = "Analyzing Differential Regulatory Networks Modulated by Continuous-State Genomic Features in Glioblastoma Multiforme", journal = j-TCBB, volume = "15", number = "6", pages = "1754--1764", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2635646", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene regulatory networks are a global representation of complex interactions between molecules that dictate cellular behavior. Study of a regulatory network modulated by single or multiple modulators' expression levels, including microRNAs miRNAs and transcription factors TFs, in different conditions can further reveal the modulators' roles in diseases such as cancers. Existing computational methods for identifying such modulated regulatory networks are typically carried out by comparing groups of samples dichotomized with respect to the modulator status, ignoring the fact that most biological features are intrinsically continuous variables. Here, we devised a sliding window-based regression scheme and proposed the Regression-based Inference of Modulation RIM algorithm to infer the dynamic gene regulation modulated by continuous-state modulators. We demonstrated the improvement in performance as well as computation efficiency achieved by RIM. Applying RIM to genome-wide expression profiles of 520 glioblastoma multiforme GBM tumors, we investigated miRNA- and TF-modulated gene regulatory networks and showed their association with dynamic cellular processes and brain-related functions in GBM. Overall, the proposed algorithm provides an efficient and robust scheme for comprehensively studying modulated gene regulatory networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hu:2018:FSO, author = "Bin Hu and Yongqiang Dai and Yun Su and Philip Moore and Xiaowei Zhang and Chengsheng Mao and Jing Chen and Lixin Xu", title = "Feature Selection for Optimized High-Dimensional Biomedical Data Using an Improved Shuffled Frog Leaping Algorithm", journal = j-TCBB, volume = "15", number = "6", pages = "1765--1773", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2602263", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High dimensional biomedical datasets contain thousands of features which can be used in molecular diagnosis of disease, however, such datasets contain many irrelevant or weak correlation features which influence the predictive accuracy of diagnosis. Without a feature selection algorithm, it is difficult for the existing classification techniques to accurately identify patterns in the features. The purpose of feature selection is to not only identify a feature subset from an original set of features [without reducing the predictive accuracy of classification algorithm] but also reduce the computation overhead in data mining. In this paper, we present our improved shuffled frog leaping algorithm which introduces a chaos memory weight factor, an absolute balance group strategy, and an adaptive transfer factor. Our proposed approach explores the space of possible subsets to obtain the set of features that maximizes the predictive accuracy and minimizes irrelevant features in high-dimensional biomedical data. To evaluate the effectiveness of our proposed method, we have employed the K-nearest neighbor method with a comparative analysis in which we compare our proposed approach with genetic algorithms, particle swarm optimization, and the shuffled frog leaping algorithm. Experimental results show that our improved algorithm achieves improvements in the identification of relevant subsets and in classification accuracy.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lan:2018:PMD, author = "Wei Lan and Jianxin Wang and Min Li and Jin Liu and Fang-Xiang Wu and Yi Pan", title = "Predicting {MicroRNA}-Disease Associations Based on Improved {MicroRNA} and Disease Similarities", journal = j-TCBB, volume = "15", number = "6", pages = "1774--1782", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586190", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs are a type of non-coding RNAs with about ~22nt nucleotides. Increasing evidences have shown that miRNAs play critical roles in many human diseases. The identification of human disease-related miRNAs is helpful to explore the underlying pathogenesis of diseases. More and more experimental validated associations between miRNAs and diseases have been reported in the recent studies, which provide useful information for new miRNA-disease association discovery. In this study, we propose a computational framework, KBMF-MDI, to predict the associations between miRNAs and diseases based on their similarities. The sequence and function information of miRNAs are used to measure similarity among miRNAs while the semantic and function information of disease are used to measure similarity among diseases, respectively. In addition, the kernelized Bayesian matrix factorization method is employed to infer potential miRNA-disease associations by integrating these data sources. We applied this method to 6,084 known miRNA-disease associations and utilized 5-fold cross validation to evaluate the performance. The experimental results demonstrate that our method can effectively predict unknown miRNA-disease associations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Maximova:2018:SGP, author = "Tatiana Maximova and Erion Plaku and Amarda Shehu", title = "Structure-Guided Protein Transition Modeling with a Probabilistic Roadmap Algorithm", journal = j-TCBB, volume = "15", number = "6", pages = "1783--1796", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2586044", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Proteins are macromolecules in perpetual motion, switching between structural states to modulate their function. A detailed characterization of the precise yet complex relationship between protein structure, dynamics, and function requires elucidating transitions between functionally-relevant states. Doing so challenges both wet and dry laboratories, as protein dynamics involves disparate temporal scales. In this paper, we present a novel, sampling-based algorithm to compute transition paths. The algorithm exploits two main ideas. First, it leverages known structures to initialize its search and define a reduced conformation space for rapid sampling. This is key to address the insufficient sampling issue suffered by sampling-based algorithms. Second, the algorithm embeds samples in a nearest-neighbor graph where transition paths can be efficiently computed via queries. The algorithm adapts the probabilistic roadmap framework that is popular in robot motion planning. In addition to efficiently computing lowest-cost paths between any given structures, the algorithm allows investigating hypotheses regarding the order of experimentally-known structures in a transition event. This novel contribution is likely to open up new venues of research. Detailed analysis is presented on multiple-basin proteins of relevance to human disease. Multiscaling and the AMBER ff14SB force field are used to obtain energetically-credible paths at atomistic detail.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2018:IBI, author = "Bo Xu and Hongfei Lin and Yuan Lin and Yunlong Ma and Liang Yang and Jian Wang and Zhihao Yang", title = "Improve Biomedical Information Retrieval Using Modified Learning to Rank Methods", journal = j-TCBB, volume = "15", number = "6", pages = "1797--1809", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2578337", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In these years, the number of biomedical articles has increased exponentially, which becomes a problem for biologists to capture all the needed information manually. Information retrieval technologies, as the core of search engines, can deal with the problem automatically, providing users with the needed information. However, it is a great challenge to apply these technologies directly for biomedical retrieval, because of the abundance of domain specific terminologies. To enhance biomedical retrieval, we propose a novel framework based on learning to rank. Learning to rank is a series of state-of-the-art information retrieval techniques, and has been proved effective in many information retrieval tasks. In the proposed framework, we attempt to tackle the problem of the abundance of terminologies by constructing ranking models, which focus on not only retrieving the most relevant documents, but also diversifying the searching results to increase the completeness of the resulting list for a given query. In the model training, we propose two novel document labeling strategies, and combine several traditional retrieval models as learning features. Besides, we also investigate the usefulness of different learning to rank approaches in our framework. Experimental results on TREC Genomics datasets demonstrate the effectiveness of our framework for biomedical information retrieval.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:DED, author = "Hongbo Zhang and Lin Zhu and De-Shuang Huang", title = "{DiscMLA}: an Efficient Discriminative {Motif} Learning Algorithm over High-Throughput Datasets", journal = j-TCBB, volume = "15", number = "6", pages = "1810--1820", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2561930", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The transcription factors TFs can activate or suppress gene expression by binding to specific sites, hence are crucial regulatory elements for transcription. Recently, series of discriminative motif finders have been tailored to offering promising strategy for harnessing the power of large quantities of accumulated high-throughput experimental data. However, in order to achieve high speed, these algorithms have to sacrifice accuracy by employing simplified statistical models during the searching process. In this paper, we propose a novel approach named Discriminative Motif Learning via AUC DiscMLA to discover motifs on high-throughput datasets. Unlike previous approaches, DiscMLA tries to optimize with a more comprehensive criterion AUC during motifs searching. In addition, based on an experimental observation of motif identification on large-scale datasets, some novel procedures are designed to accelerate DiscMLA. The experimental results on 52 real-world datasets demonstrate that our approach substantially outperforms previous methods on discriminative motif learning problems. DiscMLA' stability, discriminability, and validity will help to exploit high-throughput datasets and answer many fundamental biological questions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2018:ESP, author = "Sun Kim", title = "Editorial for Selected Papers of a {Joint Conferences, Genome Informatics Workshop\slash International Conference on Bioinformatics GIW\slash InCoB 2015}", journal = j-TCBB, volume = "15", number = "6", pages = "1821--1821", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2880126", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The four papers in this special section were presented at the joint 2015 Genome Informatics Workshop/International Conference on Bioinformatics GIW/InCoB.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hirose:2018:SCT, author = "Osamu Hirose and Shotaro Kawaguchi and Terumasa Tokunaga and Yu Toyoshima and Takayuki Teramoto and Sayuri Kuge and Takeshi Ishihara and Yuichi Iino and Ryo Yoshida", title = "{SPF-CellTracker}: Tracking Multiple Cells with Strongly-Correlated Moves Using a Spatial Particle Filter", journal = j-TCBB, volume = "15", number = "6", pages = "1822--1831", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2782255", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tracking many cells in time-lapse 3D image sequences is an important challenging task of bioimage informatics. Motivated by a study of brain-wide 4D imaging of neural activity in C. elegans, we present a new method of multi-cell tracking. Data types to which the method is applicable are characterized as follows: i cells are imaged as globular-like objects, ii it is difficult to distinguish cells on the basis of shape and size only, iii the number of imaged cells in the several-hundred range, iv movements of nearly-located cells are strongly correlated, and v cells do not divide. We developed a tracking software suite that we call SPF-CellTracker. Incorporating dependency on the cells' movements into the prediction model is the key for reducing the tracking errors: the cell switching and the coalescence of the tracked positions. We model the target cells' correlated movements as a Markov random field and we also derive a fast computation algorithm, which we call spatial particle filter. With the live-imaging data of the nuclei of C. elegans neurons in which approximately 120 nuclei of neurons were imaged, the proposed method demonstrated improved accuracy compared to the standard particle filter and the method developed by Tokunaga et al. 2014.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2018:EIC, author = "Zhanzhan Cheng and Shuigeng Zhou and Yang Wang and Hui Liu and Jihong Guan and Yi-Ping Phoebe Chen", title = "Effectively Identifying Compound-Protein Interactions by Learning from Positive and Unlabeled Examples", journal = j-TCBB, volume = "15", number = "6", pages = "1832--1843", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2570211", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of compound-protein interactions CPIs is to find new compound-protein pairs where a protein is targeted by at least a compound, which is a crucial step in new drug design. Currently, a number of machine learning based methods have been developed to predict new CPIs in the literature. However, as there is not yet any publicly available set of validated negative CPIs, most existing machine learning based approaches use the unknown interactions not validated CPIs selected randomly as the negative examples to train classifiers for predicting new CPIs. Obviously, this is not quite reasonable and unavoidably impacts the CPI prediction performance. In this paper, we simply take the unknown CPIs as unlabeled examples, and propose a new method called PUCPI the abbreviation of PU learning for Compound-Protein Interaction identification that employs biased-SVM Support Vector Machine to predict CPIs using only positive and unlabeled examples. PU learning is a class of learning methods that leans from positive and unlabeled PU samples. To the best of our knowledge, this is the first work that identifies CPIs using only positive and unlabeled examples. We first collect known CPIs as positive examples and then randomly select compound-protein pairs not in the positive set as unlabeled examples. For each CPI/compound-protein pair, we extract protein domains as protein features and compound substructures as chemical features, then take the tensor product of the corresponding compound features and protein features as the feature vector of the CPI/compound-protein pair. After that, biased-SVM is employed to train classifiers on different datasets of CPIs and compound-protein pairs. Experiments over various datasets show that our method outperforms six typical classifiers, including random forest, L1- and L2-regularized logistic regression, naive Bayes, SVM and $k$-nearest neighbor kNN, and three types of existing CPI prediction models. More information can be found at http://admis.fudan.edu.cn/projects/pucpi.html.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ismail:2018:RNR, author = "Hamid D. Ismail and Hiroto Saigo and Dukka B. KC", title = "{RF-NR}: Random Forest Based Approach for Improved Classification of Nuclear Receptors", journal = j-TCBB, volume = "15", number = "6", pages = "1844--1852", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2773063", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Nuclear Receptor NR superfamily plays an important role in key biological, developmental, and physiological processes. Developing a method for the classification of NR proteins is an important step towards understanding the structure and functions of the newly discovered NR protein. The recent studies on NR classification are either unable to achieve optimum accuracy or are not designed for all the known NR subfamilies. In this study, we developed RF-NR, which is a Random Forest based approach for improved classification of nuclear receptors. The RF-NR can predict whether a query protein sequence belongs to one of the eight NR subfamilies or it is a non-NR sequence. The RF-NR uses spectrum-like features namely: Amino Acid Composition, Di-peptide Composition, and Tripeptide Composition. Benchmarking on two independent datasets with varying sequence redundancy reduction criteria, the RF-NR achieves better or comparable accuracy than other existing methods. The added advantage of our approach is that we can also obtain biological insights about the important features that are required to classify NR subfamilies. RF-NR is freely available at http://bcb.ncat.edu/RF_NR/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tamura:2018:CMR, author = "Takeyuki Tamura and Wei Lu and Jiangning Song and Tatsuya Akutsu", title = "Computing Minimum Reaction Modifications in a {Boolean} Metabolic Network", journal = j-TCBB, volume = "15", number = "6", pages = "1853--1862", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2777456", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In metabolic network modification, we newly add enzymes or/and knock-out genes to maximize the biomass production with minimum side-effect. Although this problem has been studied for various problem settings via mathematical models including flux balance analysis, elementary mode, and Boolean models, some important problem settings still remain to be studied. In this paper, we consider the Boolean Reaction Modification BRM problem, where a host metabolic network and a reference metabolic network are given in the Boolean model. The host network initially produces some toxic compounds and cannot produce some necessary compounds, but the reference network can produce the necessary compounds, and we should minimize the total number of removed reactions from the host network and added reactions from the reference network so that the toxic compounds are not producible, but the necessary compounds are producible in the resulting host network. We developed integer linear programming ILP-based methods for BRM, and compared them with OptStrain and SimOptStrain. The results show that our method performed better for reducing the total number of added and removed reactions, while OptStrain and SimOptStrain performed better for optimizing the production of the target compound. Our developed software is freely available at ``http://sunflower.kuicr.kyoto-u.ac.jp/~rogi/solBRM/solBRM.html''.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tan:2018:SSS, author = "Ying Tan and Yuhui Shi", title = "Special Section on Swarm-Based Algorithms and Applications in Computational Biology and Bioinformatics", journal = j-TCBB, volume = "15", number = "6", pages = "1863--1864", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2879422", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The seven papers in this special section were presented at the ICSI 2016 Conference. These articles are primarily dealing with either novel bioinspired swarm intelligence algorithms and their improvements aswell as some practical applications inmulti-objective optimization, network community detection, curve fitting, and swarm robotics, etc.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Niu:2018:CSR, author = "Ben Niu and Jing Liu and Teresa Wu and Xianghua Chu and Zhengxu Wang and Yanmin Liu", title = "Coevolutionary Structure-Redesigned-Based Bacterial Foraging Optimization", journal = j-TCBB, volume = "15", number = "6", pages = "1865--1876", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2742946", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents a Coevolutionary Structure-Redesigned-Based Bacteria Foraging Optimization CSRBFO based on the natural phenomenon that most living creatures tend to cooperate with each other so as to fulfill tasks more effectively. Aiming at lowering computational complexity while maintaining the critical search capability of standard bacterial foraging optimization BFO, we employ a general loop to replace the nested loop and eliminate the reproduction step of BFO. Hence, the proposed CSRBFO only consists of two main steps: 1 chemotaxis and 2 elimination \& dispersal. A coevolutionary strategy by which all bacteria can learn from each other and search for optima cooperatively is incorporated into the chemotactic step to accelerate convergence and facilitate accurate search. In the elimination \& dispersal step, the three-stage evolutionary strategy with different learning methods for maintaining diversity is studied. An evaluation of the convergence status is then added to determine whether bacteria should move on to the next stage or not. The combination of coevolutionary strategy and convergence status evaluation is expected to balance exploration and exploitation. Experimental results comparing seven well-known heuristic algorithms on 24 benchmark functions demonstrate that the proposed CSRBFO outperforms the comparison algorithms significantly in most of the cases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2018:ESB, author = "Biao Xu and Yong Zhang and Dunwei Gong and Yinan Guo and Miao Rong", title = "Environment Sensitivity-Based Cooperative Co-Evolutionary Algorithms for Dynamic Multi-Objective Optimization", journal = j-TCBB, volume = "15", number = "6", pages = "1877--1890", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2652453", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Dynamic multi-objective optimization problems DMOPs not only involve multiple conflicting objectives, but these objectives may also vary with time, raising a challenge for researchers to solve them. This paper presents a cooperative co-evolutionary strategy based on environment sensitivities for solving DMOPs. In this strategy, a new method that groups decision variables is first proposed, in which all the decision variables are partitioned into two subcomponents according to their interrelation with environment. Adopting two populations to cooperatively optimize the two subcomponents, two prediction methods, i.e., differential prediction and Cauchy mutation, are then employed respectively to speed up their responses on the change of the environment. Furthermore, two improved dynamic multi-objective optimization algorithms, i.e., DNSGAII-CO and DMOPSO-CO, are proposed by incorporating the above strategy into NSGA-II and multi-objective particle swarm optimization, respectively. The proposed algorithms are compared with three state-of-the-art algorithms by applying to seven benchmark DMOPs. Experimental results reveal that the proposed algorithms significantly outperform the compared algorithms in terms of convergence and distribution on most DMOPs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guo:2018:RDM, author = "Yi-Nan Guo and Jian Cheng and Sha Luo and Dunwei Gong and Yu Xue", title = "Robust Dynamic Multi-Objective Vehicle Routing Optimization Method", journal = j-TCBB, volume = "15", number = "6", pages = "1891--1903", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2685320", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "For dynamic multi-objective vehicle routing problems, the waiting time of vehicle, the number of serving vehicles, and the total distance of routes were normally considered as the optimization objectives. Except for the above objectives, fuel consumption that leads to the environmental pollution and energy consumption was focused on in this paper. Considering the vehicles' load and the driving distance, a corresponding carbon emission model was built and set as an optimization objective. Dynamic multi-objective vehicle routing problems with hard time windows and randomly appeared dynamic customers, subsequently, were modeled. In existing planning methods, when the new service demand came up, global vehicle routing optimization method was triggered to find the optimal routes for non-served customers, which was time-consuming. Therefore, a robust dynamic multi-objective vehicle routing method with two-phase is proposed . Three highlights of the novel method are: i After finding optimal robust virtual routes for all customers by adopting multi-objective particle swarm optimization in the first phase, static vehicle routes for static customers are formed by removing all dynamic customers from robust virtual routes in next phase. ii The dynamically appeared customers append to be served according to their service time and the vehicles' statues. Global vehicle routing optimization is triggered only when no suitable locations can be found for dynamic customers. iii A metric measuring the algorithms robustness is given. The statistical results indicated that the routes obtained by the proposed method have better stability and robustness, but may be sub-optimum. Moreover, time-consuming global vehicle routing optimization is avoided as dynamic customers appear.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guo:2018:GPS, author = "Weian Guo and Chengyong Si and Yu Xue and Yanfen Mao and Lei Wang and Qidi Wu", title = "A Grouping Particle Swarm Optimizer with Personal-Best-Position Guidance for Large Scale Optimization", journal = j-TCBB, volume = "15", number = "6", pages = "1904--1915", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2701367", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Particle Swarm Optimization PSO is a popular algorithm which is widely investigated and well implemented in many areas. However, the canonical PSO does not perform well in population diversity maintenance so that usually leads to a premature convergence or local optima. To address this issue, we propose a variant of PSO named Grouping PSO with Personal-Best-Position $ P_{best} $ Guidance GPSO-PG which maintains the population diversity by preserving the diversity of exemplars. On one hand, we adopt uniform random allocation strategy to assign particles into different groups and in each group the losers will learn from the winner. On the other hand, we employ personal historical best position of each particle in social learning rather than the current global best particle. In this way, the exemplars diversity increases and the effect from the global best particle is eliminated. We test the proposed algorithm to the benchmarks in CEC 2008 and CEC 2010, which concern the large scale optimization problems LSOPs. By comparing several current peer algorithms, GPSO-PG exhibits a competitive performance to maintain population diversity and obtains a satisfactory performance to the problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gao:2018:NCD, author = "Chao Gao and Mingxin Liang and Xianghua Li and Zili Zhang and Zhen Wang and Zhili Zhou", title = "Network Community Detection Based on the {Physarum}-Inspired Computational Framework", journal = j-TCBB, volume = "15", number = "6", pages = "1916--1928", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2638824", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Community detection is a crucial and essential problem in the structure analytics of complex networks, which can help us understand and predict the characteristics and functions of complex networks. Many methods, ranging from the optimization-based algorithms to the heuristic-based algorithms, have been proposed for solving such a problem. Due to the inherent complexity of identifying network structure, how to design an effective algorithm with a higher accuracy and a lower computational cost still remains an open problem. Inspired by the computational capability and positive feedback mechanism in the wake of foraging process of Physarum, a kind of slime, a general Physarum-based computational framework for community detection is proposed in this paper. Based on the proposed framework, the inter-community edges can be identified from the intra-community edges in a network and the positive feedback of solving process in an algorithm can be further enhanced, which are used to improve the efficiency of original optimization-based and heuristic-based community detection algorithms, respectively. Some typical algorithms e.g., genetic algorithm, ant colony optimization algorithm, and Markov clustering algorithm and real-world datasets have been used to estimate the efficiency of our proposed computational framework. Experiments show that the algorithms optimized by Physarum-inspired computational framework perform better than the original ones, in terms of accuracy and computational cost.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Iglesias:2018:IAF, author = "Andres Iglesias and Akemi Galvez and Andreina Avila", title = "Immunological Approach for Full {NURBS} Reconstruction of Outline Curves from Noisy Data Points in Medical Imaging", journal = j-TCBB, volume = "15", number = "6", pages = "1929--1942", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2688444", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Curve reconstruction from data points is an important issue for advanced medical imaging techniques, such as computer tomography CT and magnetic resonance imaging MRI. The most powerful fitting functions for this purpose are the NURBS non-uniform rational B-splines. Solving the general reconstruction problem with NURBS requires computing all free variables of the problem data parameters, breakpoints, control points, and their weights. This leads to a very difficult non-convex, nonlinear, high-dimensional, multimodal, and continuous optimization problem. Previous methods simplify the problem by guessing the values for some variables and computing only the remaining ones. As a result, unavoidable approximations errors are introduced. In this paper, we describe the first method in the literature to solve the full NURBS curve reconstruction problem in all its generality. Our method is based on a combination of two techniques: an immunological approach to perform data parameterization, breakpoint placement, and weight calculation, and least squares minimization to compute the control points. This procedure is repeated iteratively until no further improvement is achieved for higher accuracy. The method has been applied to reconstruct some outline curves from MRI brain images with satisfactory results. Comparative work shows that our method outperforms the previous related approaches in the literature for all instances in our benchmark.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tang:2018:SRS, author = "Qirong Tang and Lu Ding and Fangchao Yu and Yuan Zhang and Yinghao Li and Haibo Tu", title = "Swarm Robots Search for Multiple Targets Based on an Improved Grouping Strategy", journal = j-TCBB, volume = "15", number = "6", pages = "1943--1950", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2682161", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Swarm robots search for multiple targets in collaboration in unknown environments has been addressed in this paper. An improved grouping strategy based on constriction factors Particle Swarm Optimization is proposed. Robots are grouped under this strategy after several iterations of stochastic movements, which considers the influence range of targets and environmental information they have sensed. The group structure may change dynamically and each group focuses on searching one target. All targets are supposed to be found finally. Obstacle avoidance is considered during the search process. Simulation compared with previous method demonstrates the adaptability, accuracy, and efficiency of the proposed strategy in multiple targets searching.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gao:2018:MMA, author = "Xin Gao and Jake Y. Chen and Mohammed J. Zaki", title = "Multiscale and Multimodal Analysis for Computational Biology", journal = j-TCBB, volume = "15", number = "6", pages = "1951--1952", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2838658", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section were presented at the 16th International Workshop on Data Mining in Bioinformatics BIOKDD17. The BIOKDD17 Workshop was organized in conjunction with the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining on August 14, 2017 in Halifax, Canada. It brought together international researchers in the interacting disciplines of data mining, medical informatics, and bioinformatics at the World Trade and Convention Centre venue. The goal of this workshop is to encourage Knowledge Discovery and Data mining KDD researchers to take on the numerous challenges that bioinformatics offers. Bioinformatics is the science of managing, mining, and interpreting information from biological data. Various genome projects have contributed to an exponential growth in DNA and protein sequence databases. Rapid advances in high-throughput technologies, such as microarrays, mass spectrometry, and new/next-generation sequencing, can monitor quantitatively the presence or activity of thousands of genes, RNAs, proteins, metabolites, and compounds in a given biological state. The ongoing influx of these data, the pressing need to address complex biomedical challenges, and the gap between the two have collectively created exciting opportunities for data mining researchers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Monteiro:2018:UML, author = "Miguel Monteiro and Ana Catarina Fonseca and Ana Teresa Freitas and Teresa {Pinho e Melo} and Alexandre P. Francisco and Jose M. Ferro and Arlindo L. Oliveira", title = "Using Machine Learning to Improve the Prediction of Functional Outcome in Ischemic Stroke Patients", journal = j-TCBB, volume = "15", number = "6", pages = "1953--1959", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2811471", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ischemic stroke is a leading cause of disability and death worldwide among adults. The individual prognosis after stroke is extremely dependent on treatment decisions physicians take during the acute phase. In the last five years, several scores such as the ASTRAL, DRAGON, and THRIVE have been proposed as tools to help physicians predict the patient functional outcome after a stroke. These scores are rule-based classifiers that use features available when the patient is admitted to the emergency room. In this paper, we apply machine learning techniques to the problem of predicting the functional outcome of ischemic stroke patients, three months after admission. We show that a pure machine learning approach achieves only a marginally superior Area Under the ROC Curve AUC $ 0.808 \pm 0.085 $ than that of the best score $ 0.771 \pm 0.056 $ when using the features available at admission. However, we observed that by progressively adding features available at further points in time, we can significantly increase the AUC to a value above 0.90. We conclude that the results obtained validate the use of the scores at the time of admission, but also point to the importance of using more features, which require more advanced methods, when possible.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2018:AMR, author = "Annie Wang and Hansaim Lim and Shu-Yuan Cheng and Lei Xie", title = "{ANTENNA}, a Multi-Rank, Multi-Layered Recommender System for Inferring Reliable Drug-Gene-Disease Associations: Repurposing Diazoxide as a Targeted Anti-Cancer Therapy", journal = j-TCBB, volume = "15", number = "6", pages = "1960--1967", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2812189", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Existing drug discovery processes follow a reductionist model of ``one-drug-one-gene-one-disease,'' which is inadequate to tackle complex diseases involving multiple malfunctioned genes. The availability of big omics data offers opportunities to transform drug discovery process into a new paradigm of systems pharmacology that focuses on designing drugs to target molecular interaction networks instead of a single gene. Here, we develop a reliable multi-rank, multi-layered recommender system, ANTENNA, to mine large-scale chemical genomics and disease association data for prediction of novel drug-gene-disease associations. ANTENNA integrates a novel tri-factorization based dual-regularized weighted and imputed One Class Collaborative Filtering OCCF algorithm, tREMAP, with a statistical framework based on Random Walk with Restart and assess the reliability of specific predictions. In the benchmark, tREMAP clearly outperforms the single-rank OCCF. We apply ANTENNA to a real-world problem: repurposing old drugs for new clinical indications without effective treatments. We discover that FDA-approved drug diazoxide can inhibit multiple kinase genes responsible for many diseases including cancer and kill triple negative breast cancer TNBC cells efficiently $ {\text {IC}}_{50} = {{0.87}} \, {{\mu } \text {M}} $. TNBC is a deadly disease without effective targeted therapies. Our finding demonstrates the power of big data analytics in drug discovery and developing a targeted therapy for TNBC.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2018:PHR, author = "Haishuai Wang and Zhicheng Cui and Yixin Chen and Michael Avidan and Arbi {Ben Abdallah} and Alexander Kronzer", title = "Predicting Hospital Readmission via Cost-Sensitive Deep Learning", journal = j-TCBB, volume = "15", number = "6", pages = "1968--1978", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2827029", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With increased use of electronic medical records EMRs, data mining on medical data has great potential to improve the quality of hospital treatment and increase the survival rate of patients. Early readmission prediction enables early intervention, which is essential to preventing serious or life-threatening events, and act as a substantial contributor to reduce healthcare costs. Existing works on predicting readmission often focus on certain vital signs and diseases by extracting statistical features. They also fail to consider skewness of class labels in medical data and different costs of misclassification errors. In this paper, we recur to the merits of convolutional neural networks CNN to automatically learn features from time series of vital sign, and categorical feature embedding to effectively encode feature vectors with heterogeneous clinical features, such as demographics, hospitalization history, vital signs, and laboratory tests. Then, both learnt features via CNN and statistical features via feature embedding are fed into a multilayer perceptron MLP for prediction. We use a cost-sensitive formulation to train MLP during prediction to tackle the imbalance and skewness challenge. We validate the proposed approach on two real medical datasets from Barnes-Jewish Hospital, and all data is taken from historical EMR databases and reflects the kinds of data that would realistically be available at the clinical prediction system in hospitals. We find that early prediction of readmission is possible and when compared with state-of-the-art existing methods used by hospitals, our methods perform significantly better. For example, using the general hospital wards data for 30-day readmission prediction, the area under the curve AUC for the proposed model was 0.70, significantly higher than all the baseline methods. Based on these results, a system is being deployed in hospital settings with the proposed forecasting algorithms to support treatment.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Halioui:2018:BWE, author = "Ahmed Halioui and Petko Valtchev and Abdoulaye Banire Diallo", title = "Bioinformatic Workflow Extraction from Scientific Texts based on Word Sense Disambiguation", journal = j-TCBB, volume = "15", number = "6", pages = "1979--1990", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2847336", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper introduces a method for automatic workflow extraction from texts using Process-Oriented Case-Based Reasoning POCBR. While the current workflow management systems implement mostly different complicated graphical tasks based on advanced distributed solutions e.g., cloud computing and grid computation, workflow knowledge acquisition from texts using case-based reasoning represents more expressive and semantic case representations. We propose in this context, an ontology-based workflow extraction framework to acquire processual knowledge from texts. Our methodology extends the classic NLP techniques to extract and disambiguate complex tasks and relations in texts. Using a graph-based representation of workflows and a domain ontology, our extraction process uses a context-aware approach to recognize workflow components in texts: data and control flows. We applied our framework in a technical domain in bioinformatics: i.e., phylogenetic analyses. An evaluation based on workflow semantic similarities in a gold standard proves that our approach provides promising results in the process extraction domain. Both data and implementation of our framework are available in: http://labo.bioinfo.uqam.ca/tgowler.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yue:2018:SGS, author = "Zongliang Yue and Michael T. Neylon and Thanh Nguyen and Timothy Ratliff and Jake Y. Chen", title = "{``Super Gene Set''} Causal Relationship Discovery from Functional Genomics Data", journal = j-TCBB, volume = "15", number = "6", pages = "1991--1998", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858755", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this article, we present a computational framework to identify ``causal relationships'' among super gene sets. For ``causal relationships,'' we refer to both stimulatory and inhibitory regulatory relationships, regardless of through direct or indirect mechanisms. For super gene sets, we refer to ``pathways, annotated lists, and gene signatures,'' or PAGs. To identify causal relationships among PAGs, we extend the previous work on identifying PAG-to-PAG regulatory relationships by further requiring them to be significantly enriched with gene-to-gene co-expression pairs across the two PAGs involved. This is achieved by developing a quantitative metric based on PAG-to-PAG Co-expressions PPC, which we use to infer the likelihood that PAG-to-PAG relationships under examination are causal-either stimulatory or inhibitory. Since true causal relationships are unknown, we approximate the overall performance of inferring causal relationships with the performance of recalling known r-type PAG-to-PAG relationships from causal PAG-to-PAG inference, using a functional genomics benchmark dataset from the GEO database. We report the area-under-curve AUC performance for both precision and recall being 0.81. By applying our framework to a myeloid-derived suppressor cells MDSC dataset, we further demonstrate that this framework is effective in helping build multi-scale biomolecular systems models with new insights on regulatory and causal links for downstream biological interpretations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Alazmi:2018:SBD, author = "Meshari Alazmi and Ahmed Abbas and Xianrong Guo and Ming Fan and Lihua Li and Xin Gao", title = "A Slice-based $^{13}$C-detected {NMR} Spin System Forming and Resonance Assignment Method", journal = j-TCBB, volume = "15", number = "6", pages = "1999--2008", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849728", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Nuclear magnetic resonance NMR spectroscopy is attracting more attention in the field of computational structural biology. Till recently, $^1$H-detected experiments are the dominant NMR technique used due to the high sensitivity of $^1$H nuclei. However, the current availability of high magnetic fields and cryogenically cooled probe heads allow researchers to overcome the low sensitivity of $^{13}$C nuclei. Consequently, $^{13}$C-detected experiments have become a popular technique in different NMR applications especially resonance assignment and structure determination of large proteins. In this paper, we propose the first spin system forming method for $^{13}$C-detected NMR spectra. Our method is able to accurately form spin systems based on as few as two $^{13}$C-detected spectra, CBCACON, and CBCANCO. Our method picks slices from the more trusted spectrum and uses them as feedback to direct the slice picking in the less trusted one. This feedback leads to picking the accurate slices that consequently helps to form better spin systems. We tested our method on a real dataset of `Ubiquitin' and a benchmark simulated dataset consisting of 12 proteins. We fed our spin systems as inputs to a genetic algorithm to generate the chemical shift assignment, and obtained 92 percent correct chemical shift assignment for Ubiquitin. For the simulated dataset, we obtained an average recall of 86 percent and an average precision of 88 percent. Finally, our chemical shift assignment of Ubiquitin was given as an input to CS-ROSETTA server that generated structures close to the experimentally determined structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mukund:2018:CEN, author = "Kavitha Mukund and Samuel R. Ward and Richard L. Lieber and Shankar Subramaniam", title = "Co-Expression Network Approach to Studying the Effects of {Botulinum Neurotoxin-A}", journal = j-TCBB, volume = "15", number = "6", pages = "2009--2016", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2763949", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Botulinum Neurotoxin A BoNT-A is a potent neurotoxin with several clinical applications. The goal of this study was to utilize co-expression network theory to analyze temporal transcriptional data from skeletal muscle after BoNT-A treatment. Expression data for 2000 genes extracted using a ranking heuristic served as the basis for this analysis. Using weighted gene co-expression network analysis WGCNA, we identified 19 co-expressed modules, further hierarchically clustered into five groups. Quantifying average expression and co-expression patterns across these groups revealed temporal aspects of muscle's response to BoNT-A. Functional analysis revealed enrichment of group 1 with metabolism; group 5 with contradictory functions of atrophy and cellular recovery; and groups 2 and 3 with extracellular matrix ECM and non-fast fiber isoforms. Topological positioning of two highly ranked, significantly expressed genes-Dclk1 and Ostalpha-within group 5 suggested possible mechanistic roles in recovery from BoNT-A induced atrophy. Phenotypic correlations of groups with titin and myosin protein content further emphasized the effect of BoNT-A on the sarcomeric contraction machinery in early phase of chemodenervation. In summary, our approach revealed a hierarchical functional response to BoNT-A induced paralysis with early metabolic and later ECM responses and identified putative biomarkers associated with chemodenervation. Additionally, our results provide an unbiased validation of the response documented in our previous work.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Martins:2018:CMT, author = "Daniel P. Martins and Michael Taynnan Barros and Massimiliano Pierobon and Meenakshisundaram Kandhavelu and Pietro Lio' and Sasitharan Balasubramaniam", title = "Computational Models for Trapping {Ebola} Virus Using Engineered Bacteria", journal = j-TCBB, volume = "15", number = "6", pages = "2017--2027", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2836430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The outbreak of the Ebola virus in recent years has resulted in numerous research initiatives to seek new solutions to contain the virus. A number of approaches that have been investigated include new vaccines to boost the immune system. An alternative post-exposure treatment is presented in this paper. The proposed approach for clearing the Ebola virus can be developed through a microfluidic attenuator, which contains the engineered bacteria that traps Ebola flowing through the blood onto its membrane. The paper presents the analysis of the chemical binding force between the virus and a genetically engineered bacterium considering the opposing forces acting on the attachment point, including hydrodynamic tension and drag force. To test the efficacy of the technique, simulations of bacterial motility within a confined area to trap the virus were performed. More than 60 percent of the displaced virus could be collected within 15 minutes. While the proposed approach currently focuses on in vitro environments for trapping the virus, the system can be further developed into a future treatment system whereby blood can be cycled out of the body into a microfluidic device that contains the engineered bacteria to trap viruses.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2018:GGS, author = "Juntao Li and Wenpeng Dong and Deyuan Meng", title = "Grouped Gene Selection of Cancer via Adaptive Sparse Group Lasso Based on Conditional Mutual Information", journal = j-TCBB, volume = "15", number = "6", pages = "2028--2038", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2761871", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper deals with the problems of cancer classification and grouped gene selection. The weighted gene co-expression network on cancer microarray data is employed to identify modules corresponding to biological pathways, based on which a strategy of dividing genes into groups is presented. Using the conditional mutual information within each divided group, an integrated criterion is proposed and the data-driven weights are constructed. They are shown with the ability to evaluate both the individual gene significance and the influence to improve correlation of all the other pairwise genes in each group. Furthermore, an adaptive sparse group lasso is proposed, by which an improved blockwise descent algorithm is developed. The results on four cancer data sets demonstrate that the proposed adaptive sparse group lasso can effectively perform classification and grouped gene selection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2018:NCG, author = "Ho-Chun Wu and Xi-Guang Wei and Shing-Chow Chan", title = "Novel Consensus Gene Selection Criteria for Distributed {GPU} Partial Least Squares-Based Gene Microarray Analysis in Diffused {Large B Cell Lymphoma DLBCL} and Related Findings", journal = j-TCBB, volume = "15", number = "6", pages = "2039--2052", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2760827", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper proposes a novel consensus gene selection criteria for partial least squares-based gene microarray analysis. By quantifying the extent of consistency and distinctiveness of the differential gene expressions across different double cross validations CV or randomizations in terms of occurrence and randomization p-values, the proposed criteria are able to identify a more comprehensive genes associated with the underlying disease. A Distributed GPU implementation has been proposed to accelerate the gene selection problem and about 8-11 times speed up has been achieved based on the microarray datasets considered. Simulation results using various cancer gene microarray datasets show that the proposed approach is able to achieve highly comparable classification accuracy in comparing with many conventional approaches. Furthermore, enrichment analysis on the selected genes for Diffused Large B Cell Lymphoma DLBCL and Prostate Cancer datasets and show that only the proposed approach is able to identify gene lists enriched in different pathways with significant p-values. In contrast, sufficient statistical significance cannot be found for conventional SVM-RFE and the t-test. The reliability in identifying and establishing statistical significance of the gene findings makes the proposed approach an attractive alternative for cancer related researches based on gene expression profiling or other similar data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Biswas:2018:BOR, author = "Surama Biswas and Sriyankar Acharyya", title = "A Bi-Objective {RNN} Model to Reconstruct Gene Regulatory Network: a Modified Multi-Objective Simulated Annealing Approach", journal = j-TCBB, volume = "15", number = "6", pages = "2053--2059", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2771360", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene Regulatory Network GRN is a virtual network in a cellular context of an organism, comprising a set of genes and their internal relationships to regulate protein production rate gene expression level of each other through coded proteins. Computational Reconstruction of GRN from gene expression data is a widely-applied research area. Recurrent Neural Network RNN is a useful modeling scheme for GRN reconstruction. In this research, the RNN formulation of GRN reconstruction having single objective function has been modified to incorporate a new objective function. An existing multi-objective meta-heuristic algorithm, called Archived Multi Objective Simulated Annealing AMOSA, has been modified and applied to this bi-objective RNN formulation. Executing the resulting algorithm called AMOSA-GRN on a gene expression dataset, a collection termed as Archive of non-dominated GRNs has been obtained. Ensemble averaging has been applied on the archives, and obtained through a sequence of executions of AMOSA-GRN. Accuracy of GRNs in the averaged archive, with respect to gold standard GRN, varies in the range 0.875 --- 1.0 87.5 --- 100 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Djeddi:2018:NCA, author = "Warith Eddine Djeddi and Sadok {Ben Yahia} and Engelbert Mephu Nguifo", title = "A Novel Computational Approach for Global Alignment for Multiple Biological Networks", journal = j-TCBB, volume = "15", number = "6", pages = "2060--2066", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2808529", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Due to the rapid progress of biological networks for modeling biological systems, a lot of biomolecular networks have been producing more and more protein-protein interaction PPI data. Analyzing protein-protein interaction networks aims to find regions of topological and functional dissimilarities between molecular networks of different species. The study of PPI networks has the potential to teach us as much about life process and diseases at the molecular level. Although few methods have been developed for multiple PPI network alignment and thus, new network alignment methods are of a compelling need. In this paper, we propose a novel algorithm for a global alignment of multiple protein-protein interaction networks called MAPPIN. The latter relies on information available for the proteins in the networks, such as sequence, function, and network topology. Our algorithm is perfectly designed to exploit current multi-core CPU architectures, and has been extensively tested on a real data eight species. Our experimental results show that MAPPIN significantly outperforms NetCoffee in terms of coverage. Nevertheless, MAPPIN is handicapped by the time required to load the gene annotation file. An extensive comparison versus the pioneering PPI methods also show that MAPPIN is often efficient in terms of coverage, mean entropy, or mean normalized.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Frith:2018:SDC, author = "Martin C. Frith and Anish M. S. Shrestha", title = "A Simplified Description of Child Tables for Sequence Similarity Search", journal = j-TCBB, volume = "15", number = "6", pages = "2067--2073", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2796064", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Finding related nucleotide or protein sequences is a fundamental, diverse, and incompletely-solved problem in bioinformatics. It is often tackled by seed-and-extend methods, which first find ``seed'' matches of diverse types, such as spaced seeds, subset seeds, or minimizers. Seeds are usually found using an index of the reference sequences, which stores seed positions in a suffix array or related data structure. A child table is a fundamental way to achieve fast lookup in an index, but previous descriptions have been overly complex. This paper aims to provide a more accessible description of child tables, and demonstrate their generality: they apply equally to all the above-mentioned seed types and more. We also show that child tables can be used without LCP longest common prefix tables, reducing the memory requirement.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2018:AIP, author = "Xizhe Zhang", title = "Altering Indispensable Proteins in Controlling Directed Human Protein Interaction Network", journal = j-TCBB, volume = "15", number = "6", pages = "2074--2078", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2796572", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The numerous interconnections within complex systems enable us to control networks towards a desired state through a few suitable selected nodes, which are called driver nodes. Recent works analyzed directed human Protein-Protein Interaction PPI network based on structural control theory. They found that indispensable proteins, whose removal increase the number of driver nodes, are the primary targets of human viruses and drugs. However, the human PPI network is usually incomplete and may include many false-positive or false-negative interactions. That prompts us to ask whether these indispensable proteins are stable to possible structural changes. Here, we present a method to alter the type of indispensable proteins and thereby investigate the stability of indispensable proteins. By comparing the sets of indispensable proteins before and after structural changes to the network, we find that very few added or removed interactions can change the type of many indispensable nodes. Furthermore, some indispensable proteins are very sensitive to structural changes and have significantly lower interactions than the other indispensable proteins. The results indicate that indispensable proteins are sensitive to structural changes. Therefore, approaches based on structural control theory should be used with caution because of the incomplete nature of these networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2018:IGN, author = "Ting Xu and Le Ou-Yang and Xiaohua Hu and Xiao-Fei Zhang", title = "Identifying Gene Network Rewiring by Integrating Gene Expression and Gene Network Data", journal = j-TCBB, volume = "15", number = "6", pages = "2079--2085", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2809603", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Exploring the rewiring pattern of gene regulatory networks between different pathological states is an important task in bioinformatics. Although a number of computational approaches have been developed to infer differential networks from high-throughput data, most of them only focus on gene expression data. The valuable static gene regulatory network data accumulated in recent biomedical researches are neglected. In this study, we propose a new Gaussian graphical model-based method to infer differential networks by integrating gene expression and static gene regulatory network data. We first evaluate the empirical performance of our method by comparing with the state-of-the-art methods using simulation data. We also apply our method to The Cancer Genome Atlas data to identify gene network rewiring between ovarian cancers with different platinum responses, and rewiring between breast cancers of luminal A subtype and basal-like subtype. Hub genes in the estimated differential networks rediscover known genes associated with platinum resistance in ovarian cancer and signatures of the breast cancer intrinsic subtypes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiao:2018:RGC, author = "Hongmei Jiao and Liping Zhang and Qikun Shen and Junwu Zhu and Peng Shi", title = "Robust Gene Circuit Control Design for Time-Delayed Genetic Regulatory Networks Without {SUM} Regulatory Logic", journal = j-TCBB, volume = "15", number = "6", pages = "2086--2093", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2825445", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper investigates the gene circuit control design problem of time-delayed genetic regulatory networks. In the genetic regulatory networks, the time delays are unknown constants, and the genetic regulatory is not conventional SUM regulatory logic and can be modeled to be an unknown nonlinear function of the time-delayed states of the other genes in a cell. By Lyapunov stability, a novel adaptive gene circuit control design approach is proposed for the genetic regulatory networks, where the unknown time delays are estimated online by adaptive algorithms and the unknown regulatory functions are approximated by neural networks. The design approach in this paper is delay-dependent and has less conservatism than the delay-independent approach. From theoretical analysis, the closed-loop system is asymptotically stable and all the signals in the system converge to an adjustable neighborhood of the origin. Finally, a numerical example is given to show the effectiveness of the new design approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luhmann:2018:SAC, author = "Nina Luhmann and Cedric Chauve and Jens Stoye and Roland Wittler", title = "Scaffolding of Ancient Contigs and Ancestral Reconstruction in a Phylogenetic Framework", journal = j-TCBB, volume = "15", number = "6", pages = "2094--2100", month = nov, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2816034", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Dec 26 18:59:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ancestral genome reconstruction is an important task to analyze the evolution of genomes. Recent progress in sequencing ancient DNA led to the publication of so-called paleogenomes and allows the integration of this sequencing data in genome evolution analysis. However, the de novo assembly of ancient genomes is usually fragmented due to DNA degradation over time among others. Integrated phylogenetic assembly addresses the issue of genome fragmentation in the ancient DNA assembly while aiming to improve the reconstruction of all ancient genomes in the phylogeny simultaneously. The fragmented assembly of the ancient genome can be represented as an assembly graph, indicating contradicting ordering information of contigs. In this setting, our approach is to compare the ancient data with extant finished genomes. We generalize a reconstruction approach minimizing the Single-Cut-or-Join rearrangement distance towards multifurcating trees and include edge lengths to improve the reconstruction in practice. This results in a polynomial time algorithm that includes additional ancient DNA data at one node in the tree, resulting in consistent reconstructions of ancestral genomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yamanishi:2019:GEA, author = "Yoshihiro Yamanishi and Yasubumi Sakakibara and Yi-Ping Phoebe Chen", title = "Guest Editorial for the {16th Asia Pacific Bioinformatics Conference}", journal = j-TCBB, volume = "16", number = "1", pages = "1--2", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2856940", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The eight papers in this special section were presented at the 16th Asia Pacific Bioinformatics Conference APBC2018, which was held in Yokohama, Japan, 15-17 January 2018. The aim of this conference is to provide an international forum for researchers, professionals, and industrial practitioners to share their knowledge and ideas of how to surf the tidal wave of information in the area of bioinformatics and computational biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kimura:2019:PCB, author = "Kouichi Kimura and Asako Koike", title = "Parallel Computation of the {Burrows--Wheeler} Transform of Short Reads Using Prefix Parallelism", journal = j-TCBB, volume = "16", number = "1", pages = "3--13", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2837749", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Burrows--Wheeler transform BWT of short-read data has unexplored potential utilities, such as for efficient and sensitive variation analysis against multiple reference genome sequences, because it does not depend on any particular reference genome sequence, unlike conventional mapping-based methods. However, since the amount of read data is generally much larger than the size of the reference sequence, computation of the BWT of reads is not easy, and this hampers development of potential applications. For the alleviation of this problem, a new method of computing the BWT of reads in parallel is proposed. The BWT, corresponding to a sorted list of suffixes of reads, is constructed incrementally by successively including longer and longer suffixes. The working data is divided into more than 10,000 ``blocks'' corresponding to sublists of suffixes with the same prefixes. Thousands of groups of blocks can be processed in parallel while making exclusive writes and concurrent reads into a shared memory. Reads and writes are basically sequential, and the read concurrency is limited to two. Thus, a fine-grained parallelism, referred to as prefix parallelism, is expected to work efficiently. The time complexity for processing $n$ reads of length $ \ell $ is $ O n \ell^2$. On actual biological DNA sequence data of about 100 Gbp with a read length of 100 bp base pairs, a tentative implementation of the proposed method took less than an hour on a single-node computer; i.e., it was about three times faster than one of the fastest programs developed so far.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Haack:2019:CDS, author = "Jordan Haack and Eli Zupke and Andrew Ramirez and Yi-Chieh Wu and Ran Libeskind-Hadas", title = "Computing the Diameter of the Space of Maximum Parsimony Reconciliations in the Duplication--Transfer--Loss Model", journal = j-TCBB, volume = "16", number = "1", pages = "14--22", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849732", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Phylogenetic tree reconciliation is widely used in the fields of molecular evolution, cophylogenetics, parasitology, and biogeography to study the evolutionary histories of pairs of entities. In these contexts, reconciliation is often performed using maximum parsimony under the Duplication-Transfer-Loss DTL event model. In general, the number of maximum parsimony reconciliations MPRs can grow exponentially with the size of the trees. While a number of previous efforts have been made to count the number of MPRs, find representative MPRs, and compute the frequencies of events across the space of MPRs, little is known about the structure of MPR space. In particular, how different are MPRs in terms of the events that they comprise? One way to address this question is to compute the diameter of MPR space, defined to be the maximum number of DTL events that distinguish any two MPRs in the solution space. We show how to compute the diameter of MPR space in polynomial time and then apply this algorithm to a large biological dataset to study the variability of events.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rizzi:2019:HCA, author = "Romeo Rizzi and Massimo Cairo and Veli Makinen and Alexandru I. Tomescu and Daniel Valenzuela", title = "Hardness of Covering Alignment: Phase Transition in Post-Sequence Genomics", journal = j-TCBB, volume = "16", number = "1", pages = "23--30", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2831691", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Covering alignment problems arise from recent developments in genomics; so called pan-genome graphs are replacing reference genomes, and advances in haplotyping enable full content of diploid genomes to be used as basis of sequence analysis. In this paper, we show that the computational complexity will change for natural extensions of alignments to pan-genome representations and to diploid genomes. More broadly, our approach can also be seen as a minimal extension of sequence alignment to labelled directed acyclic graphs labeled DAGs. Namely, we show that finding a covering alignment of two labeled DAGs is NP-hard even on binary alphabets. A covering alignment asks for two paths $ R_1 $ red and $ G_1 $ green in DAG $ D_1 $ and two paths $ R_2 $ red and $ G_2 $ green in DAG $ D_2 $ that cover the nodes of the graphs and maximize the sum of the global alignment scores: $ \mathsf {as} \mathsf {sp}R_1, \mathsf {sp}R_2 + \mathsf {as} \mathsf {sp}G_1, \mathsf {sp}G_2 $, where $ \mathsf {sp}P $ is the concatenation of labels on the path $P$. Pair-wise alignment of haplotype sequences forming a diploid chromosome can be converted to a two-path coverable labelled DAG, and then the covering alignment models the similarity of two diploids over arbitrary recombinations. We also give a reduction to the other direction, to show that such a recombination-oblivious diploid alignment is NP-hard on alphabets of size 3.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mizera:2019:TAA, author = "Andrzej Mizera and Jun Pang and Hongyang Qu and Qixia Yuan", title = "Taming Asynchrony for Attractor Detection in Large {Boolean} Networks", journal = j-TCBB, volume = "16", number = "1", pages = "31--42", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2850901", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Boolean networks is a well-established formalism for modelling biological systems. A vital challenge for analyzing a Boolean network is to identify all the attractors. This becomes more challenging for large asynchronous Boolean networks, due to the asynchronous scheme. Existing methods are prohibited due to the well-known state-space explosion problem in large Boolean networks. In this paper, we tackle this challenge by proposing a SCC-based decomposition method. We prove the correctness of our proposed method and demonstrate its efficiency with two real-life biological networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:AIR, author = "Tiancong Wang and Bin Ma", title = "Adjacent {Y}-Ion Ratio Distributions and Its Application in Peptide Sequencing", journal = j-TCBB, volume = "16", number = "1", pages = "43--51", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864647", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A scoring function plays a critical role in software for peptide identification with mass spectrometry. We present a general scoring feature that can be incorporated in the scoring functions of other peptide identification software. The scoring feature is based on the intensity ratios between two adjacent y-ions in the spectrum. A method is proposed to obtain the probability distributions of such ratios, and to calculate the scoring feature based on the distributions. To demonstrate the performance of the method, the new feature is incorporated with X!Tandem [1] , [2] and Novor [3] and significantly improved the database search and de novo sequencing performances on the testing data, respectively.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hartmann:2019:EAS, author = "Tom Hartmann and Matthias Bernt and Martin Middendorf", title = "An Exact Algorithm for Sorting by Weighted Preserving Genome Rearrangements", journal = j-TCBB, volume = "16", number = "1", pages = "52--62", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2831661", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The preserving Genome Sorting Problem pGSP asks for a shortest sequence of rearrangement operations that transforms a given gene order into another given gene order by using rearrangement operations that preserve common intervals, i.e., groups of genes that form an interval in both given gene orders. The wpGSP is the weighted version of the problem were each type of rearrangement operation has a weight and a minimum weight sequence of rearrangement operations is sought. An exact algorithm --- called CREx2 --- is presented, which solves the wpGSP for arbitrary gene orders and the following types of rearrangement operations: inversions, transpositions, inverse transpositions, and tandem duplication random loss operations. CREx2 has a worst case exponential runtime, but a linear runtime for problem instances where the common intervals are organized in a linear structure. The efficiency of CREx2 and its usefulness for phylogenetic analysis is shown empirically for gene orders of fungal mitochondrial genomes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:IRF, author = "Lei Li and Mukul S. Bansal", title = "An Integrated Reconciliation Framework for Domain, Gene, and Species Level Evolution", journal = j-TCBB, volume = "16", number = "1", pages = "63--76", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2846253", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The majority of genes in eukaryotes consists of one or more protein domains that can be independently lost or gained during evolution. This gain and loss of protein domains, through domain duplications, transfers, or losses, has important evolutionary and functional consequences. Yet, even though it is well understood that domains evolve inside genes and genes inside species, there do not exist any computational frameworks to simultaneously model the evolution of domains, genes, and species and account for their inter-dependency. Here, we develop an integrated model of domain evolution that explicitly captures the interdependence of domain-, gene-, and species-level evolution. Our model extends the classical phylogenetic reconciliation framework, which infers gene family evolution by comparing gene trees and species trees, by explicitly considering domain-level evolution and decoupling domain-level events from gene-level events. In this paper, we i introduce the new integrated reconciliation framework, ii prove that the associated optimization problem is NP-hard, iii devise an efficient heuristic solution for the problem, iv apply our algorithm to a large biological dataset, and v demonstrate the impact of using our new computational framework compared to existing approaches. The implemented software is freely available from http://compbio.engr.uconn.edu/software/seadog/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nishiyama:2019:RCN, author = "Yuhei Nishiyama and Aleksandar Shurbevski and Hiroshi Nagamochi and Tatsuya Akutsu", title = "Resource Cut, a New Bounding Procedure to Algorithms for Enumerating Tree-Like Chemical Graphs", journal = j-TCBB, volume = "16", number = "1", pages = "77--90", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2832061", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Enumerating chemical compounds with given structural properties plays an important role in structure elucidation, with applications such as drug design. We focus on the problem of enumerating tree-like chemical graphs specified by upper and lower bounds on feature vectors, where chemical graphs represent compounds, and a feature vector characterizes frequencies of finite paths in a graph. Building on the branch-and-bound algorithm proposed in earlier work, we propose a new bounding procedure, called Resource Cut, to speed up the enumeration process. Tree-like chemical graphs are modeled as vertex-colored trees, colors representing chemical elements. The algorithm is based on a scheme of generating each unique colored tree with a specified number $n$ of vertices. A colored tree is constructed by repeatedly appending vertices. Given a set $ \mathcal {R}$ of $n$ colored vertices, we found that the algorithm often constructs trees that cannot be extended to a unique representation of a colored tree no matter how the remaining unused colored vertices in the set $ \mathcal {R}$ are appended. We derive a mathematical condition to detect and discard such trees. Experimental results show that Resource Cut significantly reduces the search space. We have been able to obtain exact numbers of chemical graphs with up to 17 vertices excluding hydrogen atoms.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shen:2019:MSS, author = "Bairong Shen and Xiaoqian Jiang and Xingming Zhao", title = "Modeling and Simulation Studies of Complex Biological Systems for Precision Medicine and Healthcare", journal = j-TCBB, volume = "16", number = "1", pages = "91--92", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2850078", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section focus on modeling and simulation research of complex biological systems that are forged by precision medicine and healthcare. Recently, big data driven precision medicine has become one of the frontiers in biomedical study, but the complex diseases caused by interactions between genes, environments, and lifestyles are still difficult to be understood by traditional methods. Although we have more and more high-throughput molecular data measured and accumulated, we are still lacking fine and personalized clinical phenotype data. There is a long way to go from data to precision medicine/ healthcare, since the biomedical process is dynamic, evolutionary, and systematic. It is a big challenge to make these big data useful to the precision prognosis, diagnosis, and treatment of complex disease. Modeling and simulation will be an essential and important method to the investigation of the mechanisms and dynamic evolution of complex diseases even with big data available. The prevention and the early diagnosis of complex diseases will be essential to the coming aging society. The shifting from clinical management to precision healthcare will be also the next challenge for scientific researches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sadat:2019:SSG, author = "Md Nazmus Sadat and Md Momin {Al Aziz} and Noman Mohammed and Feng Chen and Xiaoqian Jiang and Shuang Wang", title = "{SAFETY: Secure gwAs in Federated Environment through a hYbrid Solution}", journal = j-TCBB, volume = "16", number = "1", pages = "93--102", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2829760", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent studies demonstrate that effective healthcare can benefit from using the human genomic information. Consequently, many institutions are using statistical analysis of genomic data, which are mostly based on genome-wide association studies GWAS. GWAS analyze genome sequence variations in order to identify genetic risk factors for diseases. These studies often require pooling data from different sources together in order to unravel statistical patterns, and relationships between genetic variants and diseases. Here, the primary challenge is to fulfill one major objective: accessing multiple genomic data repositories for collaborative research in a privacy-preserving manner. Due to the privacy concerns regarding the genomic data, multi-jurisdictional laws and policies of cross-border genomic data sharing are enforced among different countries. In this article, we present SAFETY, a hybrid framework, which can securely perform GWAS on federated genomic datasets using homomorphic encryption and recently introduced secure hardware component of Intel Software Guard Extensions to ensure high efficiency and privacy at the same time. Different experimental settings show the efficacy and applicability of such hybrid framework in secure conduction of GWAS. To the best of our knowledge, this hybrid use of homomorphic encryption along with Intel SGX is not proposed to this date. SAFETY is up to 4.82 times faster than the best existing secure computation technique.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mao:2019:MEP, author = "Chengsheng Mao and Yuan Zhao and Mengxin Sun and Yuan Luo", title = "Are My {EHRs} Private Enough? {Event}-Level Privacy Protection", journal = j-TCBB, volume = "16", number = "1", pages = "103--112", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2850037", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Privacy is a major concern in sharing human subject data to researchers for secondary analyses. A simple binary consent opt-in or not may significantly reduce the amount of sharable data, since many patients might only be concerned about a few sensitive medical conditions rather than the entire medical records. We propose event-level privacy protection, and develop a feature ablation method to protect event-level privacy in electronic medical records. Using a list of 13 sensitive diagnoses, we evaluate the feasibility and the efficacy of the proposed method. As feature ablation progresses, the identifiability of a sensitive medical condition decreases with varying speeds on different diseases. We find that these sensitive diagnoses can be divided into three categories: 1 five diseases have fast declining identifiability AUC below 0.6 with less than 400 features excluded; 2 seven diseases with progressively declining identifiability AUC below 0.7 with between 200 and 700 features excluded; and 3 one disease with slowly declining identifiability AUC above 0.7 with 1,000 features excluded. The fact that the majority 12 out of 13 of the sensitive diseases fall into the first two categories suggests the potential of the proposed feature ablation method as a solution for event-level record privacy protection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jiang:2019:SSL, author = "Yichen Jiang and Jenny Hamer and Chenghong Wang and Xiaoqian Jiang and Miran Kim and Yongsoo Song and Yuhou Xia and Noman Mohammed and Md Nazmus Sadat and Shuang Wang", title = "{SecureLR}: Secure Logistic Regression Model via a Hybrid Cryptographic Protocol", journal = j-TCBB, volume = "16", number = "1", pages = "113--123", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2833463", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Machine learning applications are intensively utilized in various science fields, and increasingly the biomedical and healthcare sector. Applying predictive modeling to biomedical data introduces privacy and security concerns requiring additional protection to prevent accidental disclosure or leakage of sensitive patient information. Significant advancements in secure computing methods have emerged in recent years, however, many of which require substantial computational and/or communication overheads, which might hinder their adoption in biomedical applications. In this work, we propose SecureLR, a novel framework allowing researchers to leverage both the computational and storage capacity of Public Cloud Servers to conduct learning and predictions on biomedical data without compromising data security or efficiency. Our model builds upon homomorphic encryption methodologies with hardware-based security reinforcement through Software Guard Extensions SGX, and our implementation demonstrates a practical hybrid cryptographic solution to address important concerns in conducting machine learning with public clouds.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2019:EMM, author = "Mengmeng Sun and Tao Ding and Xu-Qing Tang and Yu Keming", title = "An Efficient Mixed-Model for Screening Differentially Expressed Genes of Breast Cancer Based on {LR--RF}", journal = j-TCBB, volume = "16", number = "1", pages = "124--130", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2829519", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To screen differentially expressed genes quickly and efficiently in breast cancer, two gene microarray datasets of breast cancer, GSE15852 and GSE45255, were downloaded from GEO. By combining the Logistic Regression and Random Forest algorithm, this paper proposed a novel method named LR-RF to select differentially expressed genes of breast cancer on microarray data by the Bonferroni test of FWER error measure. Comparing with Logistic Regression and Random Forest, our study shows that LR-FR has a great facility in selecting differentially expressed genes. The average prediction accuracy of the proposed LR-RF from replicating random test 10 times surprisingly reaches $ {{93.11}} $ percent with variance as low as $ {{0.00045}} $. The prediction accuracy rate reaches a maximum 95.57 percent when threshold value $ \alpha = 0.2 $ in the random forest algorithm process of ranking genes' importance score, and the differentially expressed genes are relatively few in number. In addition, through analyzing the gene interaction networks, most of the top 20 genes we selected were found to involve in the development of breast cancer. All of these results demonstrate the reliability and efficiency of LR-RF. It is anticipated that LR-RF would provide new knowledge and method for biologists, medical scientists, and cognitive computing researchers to identify disease-related genes of breast cancer.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2019:ARC, author = "Wenliang Zhu and Xiaohe Chen and Yan Wang and Lirong Wang", title = "Arrhythmia Recognition and Classification Using {ECG} Morphology and Segment Feature Analysis", journal = j-TCBB, volume = "16", number = "1", pages = "131--138", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2846611", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, arrhythmia appearing with the presence of abnormal heart electrical activity is efficiently recognized and classified. A novel method is proposed for accurate recognition and classification of cardiac arrhythmias. Firstly, P-QRS-T waves is segmented from ECG waveform; secondly, morphological features are extracted from P-QRS-T waves, and ECG segment features are extracted from the selected ECG segment by using PCA and dynamic time warping DTW; finally, SVM is applied to the features and automatic diagnosis results is presented. ECG data set used is derived from the MIT-BIH in which ECG signals are divided into the four classes: normal beatsN, supraventricular ectopic beats SVEBs, ventricular ectopic beats VEBs and fusion of ventricular and normal F. Our proposed method can distinguish N, SVEBs, VEBs and F with an accuracy of 97.80 percent. The sensitivities for the classes N, SVEBs, VEBs and F are 99.27, 87.47, 94.71, and 73.88 percent and the positive predictivities are 98.48, 95.25, 95.22 and 86.09 percent respectively. The detection sensitivity of SVEBs and VEBs has a better performance by combining proposed features than by using the ECG morphology or ECG segment features separately. The proposed method is compared with four selected peer algorithms and delivers solid results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zeng:2019:NLP, author = "Zexian Zeng and Yu Deng and Xiaoyu Li and Tristan Naumann and Yuan Luo", title = "Natural Language Processing for {EHR}-Based Computational Phenotyping", journal = j-TCBB, volume = "16", number = "1", pages = "139--153", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849968", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This article reviews recent advances in applying natural language processing NLP to Electronic Health Records EHRs for computational phenotyping. NLP-based computational phenotyping has numerous applications including diagnosis categorization, novel phenotype discovery, clinical trial screening, pharmacogenomics, drug-drug interaction DDI, and adverse drug event ADE detection, as well as genome-wide and phenome-wide association studies. Significant progress has been made in algorithm development and resource construction for computational phenotyping. Among the surveyed methods, well-designed keyword search and rule-based systems often achieve good performance. However, the construction of keyword and rule lists requires significant manual effort, which is difficult to scale. Supervised machine learning models have been favored because they are capable of acquiring both classification patterns and structures from data. Recently, deep learning and unsupervised learning have received growing attention, with the former favored for its performance and the latter for its ability to find novel phenotypes. Integrating heterogeneous data sources have become increasingly important and have shown promise in improving model performance. Often, better performance is achieved by combining multiple modalities of information. Despite these many advances, challenges and opportunities remain for NLP-based computational phenotyping, including better model interpretability and generalizability, and proper characterization of feature relations in clinical narratives.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:DDR, author = "Yin-Ying Wang and Chunfeng Cui and Liqun Qi and Hong Yan and Xing-Ming Zhao", title = "{DrPOCS}: Drug Repositioning Based on Projection Onto Convex Sets", journal = j-TCBB, volume = "16", number = "1", pages = "154--162", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2830384", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Drug repositioning, i.e., identifying new indications for known drugs, has attracted a lot of attentions recently and is becoming an effective strategy in drug development. In literature, several computational approaches have been proposed to identify potential indications of old drugs based on various types of data sources. In this paper, by formulating the drug-disease associations as a low-rank matrix, we propose a novel method, namely DrPOCS, to identify candidate indications of old drugs based on projection onto convex sets POCS. With the integration of drug structure and disease phenotype information, DrPOCS predicts potential associations between drugs and diseases with matrix completion. Benchmarking results demonstrate that our proposed approach outperforms popular existing approaches with high accuracy. In addition, a number of novel predicted indications are validated with various types of evidences, indicating the predictive power of our proposed approach.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ding:2019:CSN, author = "Dewu Ding and Xiao Sun", title = "A Comparative Study of Network Motifs in the Integrated Transcriptional Regulation and Protein Interaction Networks of \bioname{Shewanella}", journal = j-TCBB, volume = "16", number = "1", pages = "163--171", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2804393", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Shewanella species shows a remarkable respiratory versatility with a great variety of extracellular electron acceptors termed Extracellular Electron Transfer, EET. To explore relevant mechanisms from the network motif view, we constructed the integrated networks that combined transcriptional regulation interactions TRIs and protein-protein interactions PPIs for 13 Shewanella species, identified and compared the network motifs in these integrated networks. We found that the network motifs were evolutionary conserved in these integrated networks. The functional significance of the highly conserved motifs was discussed, especially the important ones that were potentially involved in the Shewanella EET processes. More importantly, we found that: 1 the motif co-regulated PPI took a role in the ``standby mode'' of protein utilization, which will be helpful for cells to rapidly response to environmental changes; and 2 the type II cofactors, which involved in the motif TRI interacting with a third protein, mainly carried out a signalling role in Shewanella oneidensis MR-1.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:MNL, author = "Juan Wang and Jin-Xing Liu and Chun-Hou Zheng and Ya-Xuan Wang and Xiang-Zhen Kong and Chang-Gang Wen", title = "A Mixed-Norm {Laplacian} Regularized Low-Rank Representation Method for Tumor Samples Clustering", journal = j-TCBB, volume = "16", number = "1", pages = "172--182", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2769647", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Tumor samples clustering based on biomolecular data is a hot issue of cancer classifications discovery. How to extract the valuable information from high dimensional genomic data is becoming an urgent problem in tumor samples clustering. In this paper, we introduce manifold regularization into low-rank representation model and present a novel method named Mixed-norm Laplacian regularized Low-Rank Representation MLLRR to identify the differentially expressed genes for tumor clustering based on gene expression data. Then, in order to advance the accuracy and stability of tumor clustering, we establish the clustering model based on Penalized Matrix Decomposition PMD and propose a novel cluster method named MLLRR-PMD. In this method, the cancer clustering research includes three steps. First, the matrix of gene expression data is decomposed into a low rank representation matrix and a sparse matrix by MLLRR. Second, the differentially expressed genes are identified based on the sparse matrix. Finally, the PMD is applied to cluster the samples based on the differentially expressed genes. The experiment results on simulation data and real genomic data illustrate that MLLRR method enhances the robustness to outliers and achieves remarkable performance in the extraction of differentially expressed genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Paul:2019:IAI, author = "Sushmita Paul and Dhanajit Brahma", title = "An Integrated Approach for Identification of Functionally Similar {MicroRNAs} in Colorectal Cancer", journal = j-TCBB, volume = "16", number = "1", pages = "183--192", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2765332", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Colorectal cancer CRC is one of the most prevalent cancers around the globe. However, the molecular reasons for pathogenesis of CRC are still poorly understood. Recently, the role of microRNAs or miRNAs in the initiation and progression of CRC has been studied. MicroRNAs are small, endogenous noncoding RNAs found in plants, animals, and some viruses, which function in RNA silencing and posttranscriptional regulation of gene expression. Their role in CRC development is studied and they are found to be potential biomarkers in diagnosis and treatment of CRC. Therefore, identification of functionally similar CRC related miRNAs may help in the development of a prognostic tool. In this regard, this paper presents a new algorithm, called $ \mu $Sim. It is an integrative approach for identification of functionally similar miRNAs associated with CRC. It integrates judiciously the information of miRNA expression data and miRNA-miRNA functionally synergistic network data. The functional similarity is calculated based on both miRNA expression data and miRNA-miRNA functionally synergistic network data. The effectiveness of the proposed method in comparison to other related methods is shown on four CRC miRNA data sets. The proposed method selected more significant miRNAs related to CRC as compared to other related methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Karbalayghareh:2019:CSC, author = "Alireza Karbalayghareh and Ulisses Braga-Neto and Edward R. Dougherty", title = "Classification of Single-Cell Gene Expression Trajectories from Incomplete and Noisy Data", journal = j-TCBB, volume = "16", number = "1", pages = "193--207", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2763946", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper studies classification of gene-expression trajectories coming from two classes, healthy and mutated cancerous using Boolean networks with perturbation BNps to model the dynamics of each class at the state level. Each class has its own BNp, which is partially known based on gene pathways. We employ a Gaussian model at the observation level to show the expression values of the genes given the hidden binary states at each time point. We use expectation maximization EM to learn the BNps and the unknown model parameters, derive closed-form updates for the parameters, and propose a learning algorithm. After learning, a plug-in Bayes classifier is used to classify unlabeled trajectories, which can have missing data. Measuring gene expressions at different times yields trajectories only when measurements come from a single cell. In multiple-cell scenarios, the expression values are averages over many cells with possibly different states. Via the central-limit theorem, we propose another model for expression data in multiple-cell scenarios. Simulations demonstrate that single-cell trajectory data can outperform multiple-cell average expression data relative to classification error, especially in high-noise situations. We also consider data generated via a mammalian cell-cycle network, both the wild-type and with a common mutation affecting p27.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2019:CBC, author = "Kin-On Cheng and Ngai-Fong Law and Wan-Chi Siu", title = "Clustering-Based Compression for Population {DNA} Sequences", journal = j-TCBB, volume = "16", number = "1", pages = "208--221", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2762302", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Due to the advancement of DNA sequencing techniques, the number of sequenced individual genomes has experienced an exponential growth. Thus, effective compression of this kind of sequences is highly desired. In this work, we present a novel compression algorithm called Reference-based Compression algorithm using the concept of Clustering RCC. The rationale behind RCC is based on the observation about the existence of substructures within the population sequences. To utilize these substructures, $k$-means clustering is employed to partition sequences into clusters for better compression. A reference sequence is then constructed for each cluster so that sequences in that cluster can be compressed by referring to this reference sequence. The reference sequence of each cluster is also compressed with reference to a sequence which is derived from all the reference sequences. Experiments show that RCC can further reduce the compressed size by up to 91.0 percent when compared with state-of-the-art compression approaches. There is a compromise between compressed size and processing time. The current implementation in Matlab has time complexity in a factor of thousands higher than the existing algorithms implemented in C/C++. Further investigation is required to improve processing time in future.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luo:2019:DGP, author = "Ping Luo and Li-Ping Tian and Jishou Ruan and Fang-Xiang Wu", title = "Disease Gene Prediction by Integrating {PPI} Networks, Clinical {RNA}-Seq Data and {OMIM} Data", journal = j-TCBB, volume = "16", number = "1", pages = "222--232", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2770120", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Disease gene prediction is a challenging task that has a variety of applications such as early diagnosis and drug development. The existing machine learning methods suffer from the imbalanced sample issue because the number of known disease genes positive samples is much less than that of unknown genes which are typically considered to be negative samples. In addition, most methods have not utilized clinical data from patients with a specific disease to predict disease genes. In this study, we propose a disease gene prediction algorithm called dgSeq by combining protein-protein interaction PPI network, clinical RNA-Seq data, and Online Mendelian Inheritance in Man OMIN data. Our dgSeq constructs differential networks based on rewiring information calculated from clinical RNA-Seq data. To select balanced sets of non-disease genes negative samples, a disease-gene network is also constructed from OMIM data. After features are extracted from the PPI networks and differential networks, the logistic regression classifiers are trained. Our dgSeq obtains AUC values of 0.88, 0.83, and 0.80 for identifying breast cancer genes, thyroid cancer genes, and Alzheimer's disease genes, respectively, which indicates its superiority to other three competing methods. Both gene set enrichment analysis and predicted results demonstrate that dgSeq can effectively predict new disease genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yan:2019:DMP, author = "Cheng Yan and Jianxin Wang and Peng Ni and Wei Lan and Fang-Xiang Wu and Yi Pan", title = "{DNRLMF--MDA}: Predicting {microRNA-Disease} Associations Based on Similarities of {microRNAs} and Diseases", journal = j-TCBB, volume = "16", number = "1", pages = "233--243", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2776101", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs are a class of non-coding RNAs about $ \sim $22nt nucleotides. Studies have proven that miRNAs play key roles in many human complex diseases. Therefore, discovering miRNA-disease associations is beneficial to understanding disease mechanisms, developing drugs, and treating complex diseases. It is well known that it is a time-consuming and expensive process to discover the miRNA-disease associations via biological experiments. Alternatively, computational models could provide a low-cost and high-efficiency way for predicting miRNA-disease associations. In this study, we propose a method called DNRLMF-MDA to predict miRNA-disease associations based on dynamic neighborhood regularized logistic matrix factorization. DNRLMF-MDA integrates known miRNA-disease associations, functional similarity and Gaussian Interaction Profile GIP kernel similarity of miRNAs, and functional similarity and GIP kernel similarity of diseases. Especially, positive observations known miRNA-disease associations are assigned higher importance levels than negative observations unknown miRNA-disease associations.DNRLMF-MDA computes the probability that a miRNA would interact with a disease by a logistic matrix factorization method, where latent vectors of miRNAs and diseases represent the properties of miRNAs and diseases, respectively, and further improve prediction performance via dynamic neighborhood regularized. The 5-fold cross validation is adopted to assess the performance of our DNRLMF-MDA, as well as other competing methods for comparison. The computational experiments show that DNRLMF-MDA outperforms the state-of-art method PBMDA. The AUC values of DNRLMF-MDA on three datasets are 0.9357, 0.9411, and 0.9416, respectively, which are superior to the PBMDA's results of 0.9218, 0.9187, and 0.9262. The average computation times per 5-fold cross validation of DNRLMF-MDA on three datasets are 38, 46, and 50 seconds, which are shorter than the PBMDA's average computation times of 10869, 916, and 8448 seconds, respectively. DNRLMF-MDA also can predict potential diseases for new miRNAs. Furthermore, case studies illustrate that DNRLMF-MDA is an effective method to predict miRNA-disease associations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ju:2019:EDA, author = "Ronghui Ju and Chenhui Hu and Pan Zhou and Quanzheng Li", title = "Early Diagnosis of {Alzheimer}'s Disease Based on Resting-State Brain Networks and Deep Learning", journal = j-TCBB, volume = "16", number = "1", pages = "244--257", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2776910", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computerized healthcare has undergone rapid development thanks to the advances in medical imaging and machine learning technologies. Especially, recent progress on deep learning opens a new era for multimedia based clinical decision support. In this paper, we use deep learning with brain network and clinical relevant text information to make early diagnosis of Alzheimer's Disease AD. The clinical relevant text information includes age, gender, and $ A p o E $ gene of the subject. The brain network is constructed by computing the functional connectivity of brain regions using resting-state functional magnetic resonance imaging R-fMRI data. A targeted autoencoder network is built to distinguish normal aging from mild cognitive impairment, an early stage of AD. The proposed method reveals discriminative brain network features effectively and provides a reliable classifier for AD detection. Compared to traditional classifiers based on R-fMRI time series data, about 31.21 percent improvement of the prediction accuracy is achieved by the proposed deep learning method, and the standard deviation reduces by 51.23 percent in the best case that means our prediction model is more stable and reliable compared to the traditional methods. Our work excavates deep learning's advantages of classifying high-dimensional multimedia data in medical services, and could help predict and prevent AD at an early stage.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pesantez-Cabrera:2019:EDC, author = "Paola Pesantez-Cabrera and Ananth Kalyanaraman", title = "Efficient Detection of Communities in Biological Bipartite Networks", journal = j-TCBB, volume = "16", number = "1", pages = "258--271", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2765319", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Methods to efficiently uncover and extract community structures are required in a number of biological applications where networked data and their interactions can be modeled as graphs, and observing tightly-knit groups of vertices ``communities'' can offer insights into the structural and functional building blocks of the underlying network. Classical applications of community detection have largely focused on unipartite networks --- i.e., graphs built out of a single type of objects. However, due to increased availability of biological data from various sources, there is now an increasing need for handling heterogeneous networks which are built out of multiple types of objects. In this paper, we address the problem of identifying communities from biological bipartite networks --- i.e., networks where interactions are observed between two different types of objects e.g., genes and diseases, drugs and protein complexes, plants and pollinators, and hosts and pathogens. Toward detecting communities in such bipartite networks, we make the following contributions: i metric we propose a variant of bipartite modularity; ii algorithms we present an efficient algorithm called biLouvain that implements a set of heuristics toward fast and precise community detection in bipartite networks https://github.com/paolapesantez/biLouvain; and iii experiments we present a thorough experimental evaluation of our algorithm including comparison to other state-of-the-art methods to identify communities in bipartite networks. Experimental results show that our biLouvain algorithm identifies communities that have a comparable or better quality as measured by bipartite modularity than existing methods, while significantly reducing the time-to-solution between one and four orders of magnitude.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:EGW, author = "Xiangtao Li and Ka-Chun Wong", title = "Elucidating Genome-Wide Protein-{RNA} Interactions Using Differential Evolution", journal = j-TCBB, volume = "16", number = "1", pages = "272--282", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2776224", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "RNA-binding proteins RBPs play an important role in the post-transcriptional control of RNAs, such as splicing, polyadenylation, mRNA stabilization, mRNA localization, and translation. Thanks to the recent breakthrough, non-negative matrix factorization NMF has been developed to combine multiple data sources to discover non-overlapping and class-specific RNA binding patterns. However, several challenges still exist in determining the number of latent dimensions in the factorization steps. In most circumstances, it is often assumed that the number of latent dimensions or components is given. Such trial-and-error procedures can be tedious in practice. In order to address this problem, differential evolution algorithm is proposed as the model selection method to choose the suitable number of ranks, which can adaptively decompose the input protein-RNA data matrix into different nonnegative components. Experimental results demonstrate that the proposed algorithms can improve the factorization quality over the recent state-of-the-arts. The effectiveness of the proposed algorithms are supported by comprehensive performance benchmarking on 31 genome-wide cross-linking immunoprecipitation CLIP coupled with high-throughput sequencing CLIP-seq datasets. In addition, time complexity analysis and parameter analysis are conducted to demonstrate the robustness of the proposed methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2019:MPM, author = "Xuan Zhang and Quan Zou and Alfonso Rodriguez-Paton and Xiangxiang Zeng", title = "Meta-Path Methods for Prioritizing Candidate Disease {miRNAs}", journal = j-TCBB, volume = "16", number = "1", pages = "283--291", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2776280", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "MicroRNAs miRNAs play critical roles in regulating gene expression at post-transcriptional levels. Numerous experimental studies indicate that alterations and dysregulations in miRNAs are associated with important complex diseases, especially cancers. Predicting potential miRNA-disease association is beneficial not only to explore the pathogenesis of diseases, but also to understand biological processes. In this work, we propose two methods that can effectively predict potential miRNA-disease associations using our reconstructed miRNA and disease similarity networks, which are based on the latest experimental data. We reconstruct a miRNA functional similarity network using the following biological information: the miRNA family information, miRNA cluster information, experimentally valid miRNA-target association and disease-miRNA information. We also reconstruct a disease similarity network using disease functional information and disease semantic information. We present Katz with specific weights and Katz with machine learning, on the comprehensive heterogeneous network. These methods, which achieve corresponding AUC values of 0.897 and 0.919, exhibit performance superior to the existing methods. Comprehensive data networks and reasonable considerations guarantee the high performance of our methods. Contrary to several methods, which cannot work in such situations, the proposed methods also predict associations for diseases without any known related miRNAs. A web service for the download and prediction of relationships between diseases and miRNAs is available at http://lab.malab.cn/soft/MDPredict/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2019:PRH, author = "Bin Liu and Junjie Chen and Mingyue Guo and Xiaolong Wang", title = "Protein Remote Homology Detection and Fold Recognition Based on Sequence-Order Frequency Matrix", journal = j-TCBB, volume = "16", number = "1", pages = "292--300", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2765331", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein remote homology detection and fold recognition are two critical tasks for the studies of protein structures and functions. Currently, the profile-based methods achieve the state-of-the-art performance in these fields. However, the widely used sequence profiles, like position-specific frequency matrix PSFM and position-specific scoring matrix PSSM, ignore the sequence-order effects along protein sequence. In this study, we have proposed a novel profile, called sequence-order frequency matrix SOFM, to extract the sequence-order information of neighboring residues from multiple sequence alignment MSA. Combined with two profile feature extraction approaches, top-n-grams and the Smith-Waterman algorithm, the SOFMs are applied to protein remote homology detection and fold recognition, and two predictors called SOFM-Top and SOFM-SW are proposed. Experimental results show that SOFM contains more information content than other profiles, and these two predictors outperform other state-of-the-art methods. It is anticipated that SOFM will become a very useful profile in the studies of protein structures and functions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chen:2019:QNS, author = "Minghan Chen and Brandon D. Amos and Layne T. Watson and John J. Tyson and Young Cao and Clifford A. Shaffer and Michael W. Trosset and Cihan Oguz and Gisella Kakoti", title = "Quasi-{Newton} Stochastic Optimization Algorithm for Parameter Estimation of a Stochastic Model of the Budding Yeast Cell Cycle", journal = j-TCBB, volume = "16", number = "1", pages = "301--311", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2773083", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Parameter estimation in discrete or continuous deterministic cell cycle models is challenging for several reasons, including the nature of what can be observed, and the accuracy and quantity of those observations. The challenge is even greater for stochastic models, where the number of simulations and amount of empirical data must be even larger to obtain statistically valid parameter estimates. The two main contributions of this work are 1 stochastic model parameter estimation based on directly matching multivariate probability distributions, and 2 a new quasi-Newton algorithm class QNSTOP for stochastic optimization problems. QNSTOP directly uses the random objective function value samples rather than creating ensemble statistics. QNSTOP is used here to directly match empirical and simulated joint probability distributions rather than matching summary statistics. Results are given for a current state-of-the-art stochastic cell cycle model of budding yeast, whose predictions match well some summary statistics and one-dimensional distributions from empirical data, but do not match well the empirical joint distributions. The nature of the mismatch provides insight into the weakness in the stochastic model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2019:SPL, author = "Cheng Liu and Hau San Wong", title = "Structured Penalized Logistic Regression for Gene Selection in Gene Expression Data Analysis", journal = j-TCBB, volume = "16", number = "1", pages = "312--321", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2767589", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In gene expression data analysis, the problems of cancer classification and gene selection are closely related. Successfully selecting informative genes will significantly improve the classification performance. To identify informative genes from a large number of candidate genes, various methods have been proposed. However, the gene expression data may include some important correlation structures, and some of the genes can be divided into different groups based on their biological pathways. Many existing methods do not take into consideration the exact correlation structure within the data. Therefore, from both the knowledge discovery and biological perspectives, an ideal gene selection method should take this structural information into account. Moreover, the better generalization performance can be obtained by discovering correlation structure within data. In order to discover structure information among data and improve learning performance, we propose a structured penalized logistic regression model which simultaneously performs feature selection and model learning for gene expression data analysis. An efficient coordinate descent algorithm has been developed to optimize the model. The numerical simulation studies demonstrate that our method is able to select the highly correlated features. In addition, the results from real gene expression datasets show that the proposed method performs competitively with respect to previous approaches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lopez-Lopera:2019:SLF, author = "Andres F. Lopez-Lopera and Mauricio A. Alvarez", title = "Switched Latent Force Models for Reverse-Engineering Transcriptional Regulation in Gene Expression Data", journal = j-TCBB, volume = "16", number = "1", pages = "322--335", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2764908", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "To survive environmental conditions, cells transcribe their response activities into encoded mRNA sequences in order to produce certain amounts of protein concentrations. The external conditions are mapped into the cell through the activation of special proteins called transcription factors TFs. Due to the difficult task to measure experimentally TF behaviors, and the challenges to capture their quick-time dynamics, different types of models based on differential equations have been proposed. However, those approaches usually incur in costly procedures, and they present problems to describe sudden changes in TF regulators. In this paper, we present a switched dynamical latent force model for reverse-engineering transcriptional regulation in gene expression data which allows the exact inference over latent TF activities driving some observed gene expressions through a linear differential equation. To deal with discontinuities in the dynamics, we introduce an approach that switches between different TF activities and different dynamical systems. This creates a versatile representation of transcription networks that can capture discrete changes and non-linearities. We evaluate our model on both simulated data and real data e.g., microaerobic shift in E. coli, yeast respiration, concluding that our framework allows for the fitting of the expression data while being able to infer continuous-time TF profiles.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2019:TGP, author = "Jiao Zhang and Sam Kwong and Ka-Chun Wong", title = "{ToBio}: Global Pathway Similarity Search Based on Topological and Biological Features", journal = j-TCBB, volume = "16", number = "1", pages = "336--349", month = jan, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2769642", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Mon Mar 11 18:45:00 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Pathway similarity search plays a vital role in the post-genomics era. Unfortunately, pathway similarity search involves the graph isomorphism problem which is NP-complete. Therefore, efficient search algorithms are desirable. In this work, we propose a novel global pathway similarity search approach named ToBio, which considers both topological and biological features for effective global pathway similarity search. Specifically, as motivated from nature, various topological and biological features including subgraph signature similarities, sequence similarities, and gene ontology similarities are considered in ToBio. Since different features carry different functional importance and dependences, we report three schemes of ToBio using different sets of features. In addition, to enhance the existing search algorithms for rigorous comparisons, post-processing pipelines are also proposed to investigate how different features can contribute to the search performance. ToBio and other state-of-the-art methods are benchmarked on the gold-standard pathway datasets from three species. The results demonstrate the competitive edges of ToBio over the state-of-the-arts ranging from the topological aspects to the biological aspects. Case studies have been conducted to reveal mechanistic insights into the unique search performance of ToBio.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhou:2019:E, author = "Shuigeng Zhou and Yi-Ping Phoebe Chen and Hiroshi Mamitsuka", title = "Editorial", journal = j-TCBB, volume = "16", number = "2", pages = "350--351", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2827138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2827138", abstract = "This special section consists of eight papers selected from the accepted papers of the 27th International Conference on Genome Informatics (GIW2016), which was held in Shanghai, China, October 3-5, 2016. These papers cover diverse topics, including gene \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2019:SWC, author = "Xiaojun Chen and Joshua Z. Huang and Qingyao Wu and Min Yang", title = "Subspace Weighting Co-Clustering of Gene Expression Data", journal = j-TCBB, volume = "16", number = "2", pages = "352--364", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2705686", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2705686", abstract = "Microarray technology enables the collection of vast amounts of gene expression data from biological experiments. Clustering algorithms have been successfully applied to exploring the gene expression data. Since a set of genes may be only correlated to a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tian:2019:REA, author = "Bo Tian and Qiong Duan and Can Zhao and Ben Teng and Zengyou He", title = "{Reinforce}: an Ensemble Approach for Inferring {PPI} Network from {AP--MS} Data", journal = j-TCBB, volume = "16", number = "2", pages = "365--376", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2705060", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2705060", abstract = "Affinity Purification-Mass Spectrometry (AP-MS) is one of the most important technologies for constructing protein-protein interaction (PPI) networks. In this paper, we propose an ensemble method, Reinforce, for inferring PPI network from AP-MS data set. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2019:EPD, author = "Bin Xu and Jihong Guan and Yang Wang and Zewei Wang", title = "Essential Protein Detection by Random Walk on Weighted Protein--Protein Interaction Networks", journal = j-TCBB, volume = "16", number = "2", pages = "377--387", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2701824", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2701824", abstract = "Essential proteins are critical to the development and survival of cells. Identification of essential proteins is helpful for understanding the minimal set of required genes in a living cell and for designing new drugs. To detect essential proteins, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sun:2019:IAL, author = "Weiping Sun and Yi Liu and Gills A. Lajoie and Bin Ma and Kaizhong Zhang", title = "An Improved Approach for {$N$}-Linked Glycan Structure Identification from {HCD MS\slash MS} Spectra", journal = j-TCBB, volume = "16", number = "2", pages = "388--395", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2701819", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2701819", abstract = "Glycosylation is a frequently observed post-translational modification on proteins. Currently, tandem mass spectrometry (MS/MS) serves as an efficient analytical technique for characterizing structures of oligosaccharides. However, developing effective \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:IMH, author = "Jingpu Zhang and Zuping Zhang and Zhigang Chen and Lei Deng", title = "Integrating Multiple Heterogeneous Networks for Novel {LncRNA}-Disease Association Inference", journal = j-TCBB, volume = "16", number = "2", pages = "396--406", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2701379", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2701379", abstract = "Accumulating experimental evidence has indicated that long non-coding RNAs (lncRNAs) are critical for the regulation of cellular biological processes implicated in many human diseases. However, only relatively few experimentally supported lncRNA-disease \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:KLS, author = "Zuping Zhang and Jingpu Zhang and Chao Fan and Yongjun Tang and Lei Deng", title = "{KATZLGO}: Large-Scale Prediction of {LncRNA} Functions by Using the {KATZ} Measure Based on Multiple Networks", journal = j-TCBB, volume = "16", number = "2", pages = "407--416", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2704587", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2704587", abstract = "Aggregating evidences have shown that long non-coding RNAs (lncRNAs) generally play key roles in cellular biological processes such as epigenetic regulation, gene expression regulation at transcriptional and post-transcriptional levels, cell \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2019:MSM, author = "Min Li and Ruiqing Zheng and Yaohang Li and Fang-Xiang Wu and Jianxin Wang", title = "{MGT--SM}: a Method for Constructing Cellular Signal Transduction Networks", journal = j-TCBB, volume = "16", number = "2", pages = "417--424", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2705143", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2705143", abstract = "A cellular signal transduction network is an important means to describe biological responses to environmental stimuli and exchange of biological signals. Constructing the cellular signal transduction network provides an important basis for the study of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2019:CMC, author = "Shaoliang Peng and Yingbo Cui and Shunyun Yang and Wenhe Su and Xiaoyu Zhang and Tenglilang Zhang and Weiguo Liu and Xing-Ming Zhao", title = "A {CPU--MIC} Collaborated Parallel Framework for {GROMACS} on {Tianhe-2} Supercomputer", journal = j-TCBB, volume = "16", number = "2", pages = "425--433", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2713362", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/super.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2713362", abstract = "Molecular Dynamics (MD) is the simulation of the dynamic behavior of atoms and molecules. As the most popular software for molecular dynamics, GROMACS cannot work on large-scale data because of limit computing resources. In this paper, we propose a CPU \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:FSS, author = "Jie Zhang and Zhigen Zhao and Kai Zhang and Zhi Wei", title = "A Feature Sampling Strategy for Analysis of High Dimensional Genomic Data", journal = j-TCBB, volume = "16", number = "2", pages = "434--441", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779492", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779492", abstract = "With the development of high throughput technology, it has become feasible and common to profile tens of thousands of gene activities simultaneously. These genomic data typically have sample size of hundreds or fewer, which is much less than the feature \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2019:UMJ, author = "Kefei Liu and Jieping Ye and Yang Yang and Li Shen and Hui Jiang", title = "A Unified Model for Joint Normalization and Differential Gene Expression Detection in {RNA}-Seq Data", journal = j-TCBB, volume = "16", number = "2", pages = "442--454", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2790918", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2790918", abstract = "The RNA-sequencing (RNA-seq) is becoming increasingly popular for quantifying gene expression levels. Since the RNA-seq measurements are relative in nature, between-sample normalization is an essential step in differential expression (DE) analysis. The \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:IFI, author = "Wei Zhang and Shu-Lin Wang", title = "An Integrated Framework for Identifying Mutated Driver Pathway and Cancer Progression", journal = j-TCBB, volume = "16", number = "2", pages = "455--464", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2788016", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2788016", abstract = "Next-generation sequencing (NGS) technologies provide amount of somatic mutation data in a large number of patients. The identification of mutated driver pathway and cancer progression from these data is a challenging task because of the heterogeneity of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Michels:2019:RBM, author = "Tim Michels and Dimitri Berh and Xiaoyi Jiang", title = "An {RJMCMC}-Based Method for Tracking and Resolving Collisions of \bioname{Drosophila} Larvae", journal = j-TCBB, volume = "16", number = "2", pages = "465--474", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779141", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779141", abstract = "Drosophila melanogaster is an important model organism for ongoing research in neuro- and behavioral biology. Especially the locomotion analysis has become an integral part of such studies and thus elaborated automated tracking systems have been proposed \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:BNC, author = "Lu Zhang and Qiuping Pan and Yue Wang and Xintao Wu and Xinghua Shi", title = "{Bayesian} Network Construction and Genotype-Phenotype Inference Using {GWAS} Statistics", journal = j-TCBB, volume = "16", number = "2", pages = "475--489", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779498", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779498", abstract = "Genome-wide association studies (GWASs) have received increasing attention to understand how genetic variation affects different human traits. In this paper, we study whether and to what extent exploiting the GWAS statistics can be used for inferring \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mandal:2019:BIC, author = "Koyel Mandal and Rosy Sarmah and Dhruba Kumar Bhattacharyya", title = "Biomarker Identification for Cancer Disease Using Biclustering Approach: an Empirical Study", journal = j-TCBB, volume = "16", number = "2", pages = "490--509", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2820695", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2820695", abstract = "This paper presents an exhaustive empirical study to identify biomarkers using two approaches: frequency-based and network-based, over 17 different biclustering algorithms and six different cancer expression datasets. To systematically analyze the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Andersen:2019:CTM, author = "Jakob L. Andersen and Christoph Flamm and Daniel Merkle and Peter F. Stadler", title = "Chemical Transformation Motifs --- Modelling Pathways as Integer Hyperflows", journal = j-TCBB, volume = "16", number = "2", pages = "510--523", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2781724", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2781724", abstract = "We present an elaborate framework for formally modelling pathways in chemical reaction networks on a mechanistic level. Networks are modelled mathematically as directed multi-hypergraphs, with vertices corresponding to molecules and hyperedges to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Boluki:2019:CPB, author = "Shahin Boluki and Mohammad Shahrokh Esfahani and Xiaoning Qian and Edward R. Dougherty", title = "Constructing Pathway-Based Priors within a {Gaussian} Mixture Model for {Bayesian} Regression and Classification", journal = j-TCBB, volume = "16", number = "2", pages = "524--537", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2778715", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2778715", abstract = "Gene-expression-based classification and regression are major concerns in translational genomics. If the feature-label distribution is known, then an optimal classifier can be derived. If the predictor-target distribution is known, then an optimal \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:DPD, author = "Huanan Zhang and David Roe and Rui Kuang", title = "Detecting Population-Differentiation Copy Number Variants in Human Population Tree by Sparse Group Selection", journal = j-TCBB, volume = "16", number = "2", pages = "538--549", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779481", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779481", abstract = "Copy-number variants (CNVs) account for a substantial proportion of human genetic variations. Understanding the CNV diversities across populations is a computational challenge because CNV patterns are often present in several related populations and only \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Caudai:2019:ESC, author = "Claudia Caudai and Emanuele Salerno and Monica Zopp{\`e} and Anna Tonazzini", title = "Estimation of the Spatial Chromatin Structure Based on a Multiresolution Bead-Chain Model", journal = j-TCBB, volume = "16", number = "2", pages = "550--559", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2791439", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2791439", abstract = "We present a method to infer 3D chromatin configurations from Chromosome Conformation Capture data. Quite a few methods have been proposed to estimate the structure of the nuclear dna in homogeneous populations of cells from this kind of data. Many of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nizami:2019:EAS, author = "Bilal Nizami and Elham Mousavinezhad Sarasia and Mehbub I. K. Momin and Bahareh Honarparvar", title = "Estrogenic Active Stilbene Derivatives as Anti-Cancer Agents: a {DFT} and {QSAR} Study", journal = j-TCBB, volume = "16", number = "2", pages = "560--568", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779505", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779505", abstract = "Exploring different quantum chemical quantities for lead compounds is an ongoing approach in identifying crucial structural activity related features that are contributing into their biological activities. Herein, activity-related quantum chemical \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2019:FAC, author = "Biing-Feng Wang and Chih-Yu Li", title = "Fast Algorithms for Computing Path-Difference Distances", journal = j-TCBB, volume = "16", number = "2", pages = "569--582", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2790957", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2790957", abstract = "Tree comparison metrics are an important tool for the study of phylogenetic trees. Path-difference distances measure the dissimilarity between two phylogenetic trees (on the same set of taxa) by comparing their path-length vectors. Various norms can be \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Foo:2019:MCG, author = "Mathias Foo and Jongrae Kim and Declan G. Bates", title = "Modelling and Control of Gene Regulatory Networks for Perturbation Mitigation", journal = j-TCBB, volume = "16", number = "2", pages = "583--595", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2771775", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2771775", abstract = "Synthetic Biologists are increasingly interested in the idea of using synthetic feedback control circuits for the mitigation of perturbations to gene regulatory networks that may arise due to disease and/or environmental disturbances. Models employing \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nguyen:2019:NDL, author = "Son P. Nguyen and Zhaoyu Li and Dong Xu and Yi Shang", title = "New Deep Learning Methods for Protein Loop Modeling", journal = j-TCBB, volume = "16", number = "2", pages = "596--606", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2784434", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2784434", abstract = "Computational protein structure prediction is a long-standing challenge in bioinformatics. In the process of predicting protein 3D structures, it is common that parts of an experimental structure are missing or parts of a predicted structure need to be \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Niu:2019:PPI, author = "Yun Niu and Hongmei Wu and Yuwei Wang", title = "Protein-Protein Interaction Identification Using a Similarity-Constrained Graph Model", journal = j-TCBB, volume = "16", number = "2", pages = "607--616", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2777448", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2777448", abstract = "Protein-protein interaction (PPI) identification is an important task in text mining. Most PPI detection systems make predictions solely based on evidence within a single sentence and often suffer from the heavy burden of manual annotation. This paper \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Salmela:2019:SFG, author = "Leena Salmela and Alexandru I. Tomescu", title = "Safely Filling Gaps with Partial Solutions Common to All Solutions", journal = j-TCBB, volume = "16", number = "2", pages = "617--626", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2785831", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2785831", abstract = "Gap filling has emerged as a natural sub-problem of many de novo genome assembly projects. The gap filling problem generally asks for an $s$ s-$t$ t path in an assembly graph whose length matches the gap length estimate. Several methods have addressed it, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lopez-Caamal:2019:SAI, author = "Fernando L{\'o}pez-Caamal and Heinrich J. Huber", title = "Stable {IL}-$ 1 \beta 1 \beta $-Activation in an Inflammasome Signalling Model Depends on Positive and Negative Feedbacks and Tight Regulation of Protein Production", journal = j-TCBB, volume = "16", number = "2", pages = "627--637", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2794971", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2794971", abstract = "Introduction. NLRP3-dependent inflammasome signalling is a key pathway during inflammatory processes and its deregulation is implicated in several diseases. NLRP3-inflammasome pathway activation leads to the rapid, phosphorylation-driven NF$ \kappa $ \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Najafi:2019:SAM, author = "Amir Najafi and Sepehr Janghorbani and Seyed Abolfazl Motahari and Emad Fatemizadeh", title = "Statistical Association Mapping of Population-Structured Genetic Data", journal = j-TCBB, volume = "16", number = "2", pages = "638--649", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2786239", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2786239", abstract = "Association mapping of genetic diseases has attracted extensive research interest during the recent years. However, most of the methodologies introduced so far suffer from spurious inference of the associated sites due to population inhomogeneities. In \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2019:RCM, author = "Xiaohui Lin and Xin Huang and Lina Zhou and Weijie Ren and Jun Zeng and Weihong Yao and Xingyuan Wang", title = "The Robust Classification Model Based on Combinatorial Features", journal = j-TCBB, volume = "16", number = "2", pages = "650--657", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779512", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779512", abstract = "Analyzing the disease data from the view of combinatorial features may better characterize the disease phenotype. In this study, a novel method is proposed to construct feature combinations and a classification model (CFC-CM) by mining key feature \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shao:2019:THM, author = "Mingfu Shao and Carl Kingsford", title = "Theory and A Heuristic for the Minimum Path Flow Decomposition Problem", journal = j-TCBB, volume = "16", number = "2", pages = "658--670", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2779509", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2779509", abstract = "Motivated by multiple genome assembly problems and other applications, we study the following minimum path flow decomposition problem: Given a directed acyclic graph $ G = (V, E) $G=(V,E) with source $s$ s and sink $t$ t and a flow $f$ f, compute a set of $s$ s-$t$. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kaiser:2019:UDG, author = "Florian Kaiser and Dirk Labudde", title = "Unsupervised Discovery of Geometrically Common Structural Motifs and Long-Range Contacts in Protein {$3$D} Structures", journal = j-TCBB, volume = "16", number = "2", pages = "671--680", month = mar, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2786250", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2017.2786250", abstract = "The essential role of small evolutionarily conserved structural units in proteins has been extensively researched and validated. A popular example are serine proteases, where the peptide cleavage reaction is realized by a configuration of only three \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Martin-Vide:2019:ACB, author = "Carlos Martin-Vide and Miguel A. Vega-Rodriguez", title = "Algorithms for Computational Biology: Third Edition", journal = j-TCBB, volume = "16", number = "3", pages = "701--702", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911264", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section were presented at the 3rd International Conference on Algorithms for Computational Biology, AlCoB 2016, that was held in Trujillo, Spain, on June 21--22, 2016.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ahmed:2019:GMS, author = "Syed Ali Ahmed and Saad Mneimneh", title = "{Gibbs\slash MCMC} Sampling for Multiple {RNA} Interaction with Sub-Optimal Solutions", journal = j-TCBB, volume = "16", number = "3", pages = "703--712", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2890519", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiple RNA interaction can be modeled as a problem in combinatorial optimization, where the ``optimal'' structure is driven by an energy-minimization-like algorithm. However, the actual structure may not be optimal in this computational sense. Moreover, it is not necessarily unique. Therefore, alternative sub-optimal solutions are needed to cover the biological ground. We present a combinatorial formulation for the Multiple RNA Interaction problem with approximation algorithms to handle various interaction patterns, which when combined with Gibbs sampling and Markov Chain Monte Carlo MCMC, can efficiently generate a reasonable number of optimal and sub-optimal solutions. When viable structures are far from an optimal solution, exploring dependence among different parts of the interaction can increase their score and boost their candidacy for the sampling algorithm. By clustering the solutions, we identify a few representatives that are distinct enough to suggest possible alternative structures.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mykowiecka:2019:CEE, author = "Agnieszka Mykowiecka and Pawel Gorecki", title = "Credibility of Evolutionary Events in Gene Trees", journal = j-TCBB, volume = "16", number = "3", pages = "713--726", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2788888", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Based on the classical non-parametric bootstrapping for phylogenetic trees, we propose a novel bootstrap method to define support for gene duplication and speciation events. By comparing bootstrap gene trees to the original gene tree, we calculate support for evolutionary events. While this approach can be used to annotate orthology and paralogy, we show how it can be used to verify the reliability of tree reconciliation. We propose a linear time algorithm for the computation of bootstrap values, and we show the correspondence of our method with the classical non-parametric bootstrapping. Finally, we present two computational experiments. In the first one, based on simulated data and nine yeast genomes, we show a comparative study of several tree rooting methods and evaluation of their performance by using our bootstrapping method. In the second experiment, using data from the TreeFam database, we tested how the reliability of the gene trees influence the inferred supertree. We found out that species trees inferred from gene trees having highly supported events are more biologically consistent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sankoff:2019:MSD, author = "David Sankoff and Chunfang Zheng and Yue Zhang and Joao Meidanis and Eric Lyons and Haibao Tang", title = "Models for Similarity Distributions of Syntenic Homologs and Applications to Phylogenomics", journal = j-TCBB, volume = "16", number = "3", pages = "727--737", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849377", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We outline an integrated approach to speciation and whole genome doubling WGD to resolve the occurrence of these events in phylogenetic analysis. We propose a more principled way of estimating the parameters of gene divergence and fractionation than the standard mixture of normals analysis. We formulate an algorithm for resolving data on local peaks in the distributions of duplicate gene similarities for a number of related genomes. We illustrate with a comprehensive analysis of WGD-origin duplicate gene data from the family \bioname{Brassicaceae}.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Urbini:2019:ERP, author = "Laura Urbini and Blerina Sinaimeri and Catherine Matias and Marie-France Sagot", title = "Exploring the Robustness of the Parsimonious Reconciliation Method in Host-Symbiont Cophylogeny", journal = j-TCBB, volume = "16", number = "3", pages = "738--748", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2838667", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The aim of this paper is to explore the robustness of the parsimonious host-symbiont tree reconciliation method under editing or small perturbations of the input. The editing involves making different choices of unique symbiont mapping to a host in the case where multiple associations exist. This is made necessary by the fact that the tree reconciliation model is currently unable to handle such associations. The analysis performed could however also address the problem of errors. The perturbations are re-rootings of the symbiont tree to deal with a possibly wrong placement of the root specially in the case of fast-evolving species. In order to do this robustness analysis, we introduce a simulation scheme specifically designed for the host-symbiont cophylogeny context, as well as a measure to compare sets of tree reconciliations, both of which are of interest by themselves.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Huang:2019:GES, author = "De-Shuang Huang and Vitoantonio Bevilacqua and M. Michael Gromiha", title = "Guest Editorial for Special Section on the {13th International Conference on Intelligent Computing ICIC}", journal = j-TCBB, volume = "16", number = "3", pages = "749--750", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2902324", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers presented in this special section were presented at the Thirteenth International Conference on Intelligent Computing ICIC that was held in Liverpool, UK, on August 7-10, 2017. ICIC was formed to provide an annual forum dedicated to the emerging and challenging topics in artificial intelligence, machine learning, bioinformatics, and computational biology, etc. It aims to bring together researchers and practitioners from both academia and industry to share ideas, problems, and solutions related to the multifaceted aspects of intelligent computing.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wassan:2019:CSP, author = "Jyotsna Talreja Wassan and Haiying Wang and Fiona Browne and Huiru Zheng", title = "A Comprehensive Study on Predicting Functional Role of Metagenomes Using Machine Learning Methods", journal = j-TCBB, volume = "16", number = "3", pages = "751--763", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858808", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "``Metagenomics'' is the study of genomic sequences obtained directly from environmental microbial communities with the aim to linking their structures with functional roles. The field has been aided in the unprecedented advancement through high-throughput omics data sequencing. The outcome of sequencing are biologically rich data sets. Metagenomic data consisting of microbial species which outnumber microbial samples, lead to the ``curse of dimensionality'' in datasets. Hence, the focus in metagenomics studies has moved towards developing efficient computational models using Machine Learning ML, reducing the computational cost. In this paper, we comprehensively assessed various ML approaches to classifying high-dimensional human microbiota effectively into their functional phenotypes. We propose the application of embedded feature selection methods, namely, Extreme Gradient Boosting and Penalized Logistic Regression to determine important microbial species. The resultant feature set enhanced the performance of one of the most popular state-of-the-art methods, Random Forest RF over metagenomic studies. Experimental results indicate that the proposed method achieved best results in terms of accuracy, area under the Receiver Operating Characteristic curve ROC-AUC, and major improvement in processing time. It outperformed other feature selection methods of filters or wrappers over RF and classifiers such as Support Vector Machine SVM, Extreme Learning Machine ELM, and $k$-Nearest Neighbors $k$-NN.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:NSA, author = "Min Li and Li Tang and Zhongxiang Liao and Junwei Luo and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "A Novel Scaffolding Algorithm Based on Contig Error Correction and Path Extension", journal = j-TCBB, volume = "16", number = "3", pages = "764--773", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858267", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The sequence assembly process can be divided into three stages: contigs extension, scaffolding, and gap filling. The scaffolding method is an essential step during the process to infer the direction and sequence relationships between the contigs. However, scaffolding still faces the challenges of uneven sequencing depth, genome repetitive regions, and sequencing errors, which often leads to many false relationships between contigs. The performance of scaffolding can be improved by removing potential false conjunctions between contigs. In this study, a novel scaffolding algorithm which is on the basis of path extension Loose-Strict-Loose strategy and contig error correction, called iLSLS. iLSLS helps reduce the false relationships between contigs, and improve the accuracy of subsequent steps. iLSLS utilizes a scoring function, which estimates the correctness of candidate paths by the distribution of paired reads, and try to conduction the extension with the path which is scored the highest. What's more, iLSLS can precisely estimate the gap size. We conduct experiments on two real datasets, and the results show that LSLS strategy is efficient to increase the correctness of scaffolds, and iLSLS performs better than other scaffolding methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2019:EPH, author = "Xiaolong Zhang and Xiaoli Lin and Jiafu Zhao and Qianqian Huang and Xin Xu", title = "Efficiently Predicting Hot Spots in {PPIs} by Combining Random Forest and Synthetic Minority Over-Sampling Technique", journal = j-TCBB, volume = "16", number = "3", pages = "774--781", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2871674", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Hot spot residues bring into play the vital function in bioinformatics to find new medications such as drug design. However, current datasets are predominately composed of non-hot spots with merely a tiny percentage of hot spots. Conventional hot spots prediction methods may face great challenges towards the problem of imbalance training samples. This paper presents a classification method combining with random forest classification and oversampling strategy to improve the training performance. A strategy with an oversampling ability is used to generate hot spots data to balance the given training set. Random forest classification is then invoked to generate a set of forest trees for this oversampled training set. The final prediction performance can be computed recursively after the oversampling and training process. This proposed method is capable of randomly selecting features and constructing a robust random forest to avoid overfitting the training set. Experimental results from three data sets indicate that the performance of hot spots prediction has been significantly improved compared with existing classification methods.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yuan:2019:IMO, author = "Lin Yuan and Le-Hang Guo and Chang-An Yuan and Youhua Zhang and Kyungsook Han and Asoke K. Nandi and Barry Honig and De-Shuang Huang", title = "Integration of Multi-Omics Data for Gene Regulatory Network Inference and Application to Breast Cancer", journal = j-TCBB, volume = "16", number = "3", pages = "782--791", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2866836", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Underlying a cancer phenotype is a specific gene regulatory network that represents the complex regulatory relationships between genes. It remains, however, a challenge to find cancer-related gene regulatory network because of insufficient sample sizes and complex regulatory mechanisms in which gene is influenced by not only other genes but also other biological factors. With the development of high-throughput technologies and the unprecedented wealth of multi-omics data it gives us a new opportunity to design machine learning method to investigate underlying gene regulatory network. In this paper, we propose an approach, which use Biweight Midcorrelation to measure the correlation between factors and make use of Nonconvex Penalty based sparse regression for Gene Regulatory Network inference BMNPGRN. BMNCGRN incorporates multi-omics data including DNA methylation and copy number variation and their interactions in gene regulatory network model. The experimental results on synthetic datasets show that BMNPGRN outperforms popular and state-of-the-art methods including DCGRN, ARACNE, and CLR under false positive control. Furthermore, we applied BMNPGRN on breast cancer BRCA data from The Cancer Genome Atlas database and provided gene regulatory network.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Deng:2019:IKG, author = "Su-Ping Deng and Wei-Li Guo", title = "Identifying Key Genes of Liver Cancer by Networking of Multiple Data Sets", journal = j-TCBB, volume = "16", number = "3", pages = "792--800", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2874238", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Liver cancer is one of the deadliest cancers in the world. To find effective therapies for this cancer, it is indispensable to identify key genes, which may play critical roles in the incidence of the liver cancer. To identify key genes of the liver cancer with high accuracy, we integrated multiple microarray gene expression data sets to compute common differentially expressed genes, which will result more accurate than those from individual data set. To find the main functions or pathways that these genes are involved in, some enrichment analyses were performed including functional enrichment analysis, pathway enrichment analysis, and disease association study. Based on these genes, a protein-protein interaction network was constructed and analyzed to identify key genes of the liver cancer by combining the local and global influence of nodes in the network. The identified key genes, such as TOP2A, ESR1, and KMO, have been demonstrated to be key biomarkers of the liver cancer in many publications. All the results suggest that our method can effectively identify key genes of the liver cancer. Moreover, our method can be applied to other types of data sets to select key genes of other complex diseases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wu:2019:CDM, author = "Peng Wu and Dong Wang", title = "Classification of a {DNA} Microarray for Diagnosing Cancer Using a Complex Network Based Method", journal = j-TCBB, volume = "16", number = "3", pages = "801--808", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2868341", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Applications that classify DNA microarray expression data are helpful for diagnosing cancer. Many attempts have been made to analyze these data; however, new methods are needed to obtain better results. In this study, a Complex Network CN classifier was exploited to implement the classification task. An algorithm was used to initialize the structure, which allowed input variables to be selected over layered connections and different activation functions for different nodes. Then, a hybrid method integrated the Genetic Programming and the Particle Swarm Optimization algorithms was used to identify an optimal structure with the parameters encoded in the classifier. The single CN classifier and an ensemble of CN classifiers were tested on four bench data sets. To ensure diversity of the ensemble classifiers, we constructed a base classifier using different feature sets, i.e., Pearson's correlation, Spearman's correlation, euclidean distance, Cosine coefficient, and the Fisher-ratio. The experimental results suggest that a single classifier can be used to obtain state-of-the-art results and the ensemble yielded better results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{You:2019:EEL, author = "Zhu-Hong You and Wen-Zhun Huang and Shanwen Zhang and Yu-An Huang and Chang-Qing Yu and Li-Ping Li", title = "An Efficient Ensemble Learning Approach for Predicting Protein-Protein Interactions by Integrating Protein Primary Sequence and Evolutionary Information", journal = j-TCBB, volume = "16", number = "3", pages = "809--817", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2882423", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein-protein interactions PPIs perform a very important function in a number of cellular processes, including signal transduction, post-translational modifications, apoptosis, and cell growth. Deregulation of PPIs will lead to many diseases, including pernicious anemia or cancers. Although a large number of high-throughput techniques are designed to generate PPIs data, they are generally expensive, inefficient, and labor-intensive. Hence, there is an urgent need for developing a computational method to accurately and rapidly detect PPIs. In this article, we proposed a highly efficient method to detect PPIs by integrating a new protein sequence substitution matrix feature representation and ensemble weighted sparse representation model classifier. The proposed method is demonstrated on Saccharomyces cerevisiae dataset and achieved 99.26 percent prediction accuracy with 98.53 percent sensitivity at precision of 100 percent, which is shown to have much higher predictive accuracy than the state-of-the-art methods. Extensive contrast experiments are performed with the benchmark data set from Human and Helicobacter pylori that our proposed method can achieve outstanding better success rates than other existing approaches in this problem. Experiment results illustrate that our proposed method presents an economical approach for computational building of PPI networks, which can be a helpful supplementary method for future proteomics researches.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhong:2019:FEF, author = "Hua Zhong and Mingzhou Song", title = "A Fast Exact Functional Test for Directional Association and Cancer Biology Applications", journal = j-TCBB, volume = "16", number = "3", pages = "818--826", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2809743", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Directional association measured by functional dependency can answer important questions on relationships between variables, for example, in discovery of molecular interactions in biological systems. However, when one has no prior information about the functional form of a directional association, there is not a widely established statistical procedure to detect such an association. To address this issue, here we introduce an exact functional test for directional association by examining the strength of functional dependency. It is effective in promoting functional patterns by reducing statistical power on dependent non-functional patterns. We designed an algorithm to carry out the test using a fast branch-and-bound strategy, which achieved a substantial speedup over brute-force enumeration. On data from an epidemiological study of liver cancer, the test identified the hepatitis status of a subject as the most influential risk factor among others for the cancer phenotype. On human lung cancer transcriptome data, the test selected 1068 transcription start sites of putative noncoding RNAs directionally associated with the presence or absence of lung cancer, stronger than 95 percent transcription start sites of 694 curated cancer genes. These predictions include non-monotonic interaction patterns, to which other routine tests were insensitive. Complementing symmetric non-directional association methods such as Fisher's exact test, the exact functional test is a unique exact statistical test for evaluating evidence for causal relationships.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Samaddar:2019:MDP, author = "Sandip Samaddar and Rituparna Sinha and Rajat K. De", title = "A Model for Distributed Processing and Analyses of {NGS} Data under Map-Reduce Paradigm", journal = j-TCBB, volume = "16", number = "3", pages = "827--840", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2816022", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Massively parallel sequencing technique, introduced by NGS technology, has resulted in an exponential growth of sequencing data, with greatly reduced cost and increased throughput. This huge explosion of data has introduced new challenges in regard to its storage, integration, processing, and analyses. In this paper, we have proposed a novel distributed model under Map-Reduce paradigm to address the NGS big data problem. The architecture of the model involves Map-Reduce based modularized approach involving three different phases that support various analytical pipelines. The first phase will generate detailed base level information of various individual genomes, by granulating the alignment data. The other two phases independently process this base level information in parallel. One of these two phases will provide an integrated DNA profile of multiple individuals, whereas the other phase will generate contigs with similar features in an individual. Each of these three phases will generate a repository of genomic information that will facilitate other analytical pipelines. A simulated and real experimental prototypes has been provided as results to show the effectiveness of the model and its superiority over a few existing popular models and tools. A detailed description of the scope of applications of this model is also included in this article.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sun:2019:MDN, author = "Dongdong Sun and Minghui Wang and Ao Li", title = "A Multimodal Deep Neural Network for Human Breast Cancer Prognosis Prediction by Integrating Multi-Dimensional Data", journal = j-TCBB, volume = "16", number = "3", pages = "841--850", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2806438", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Breast cancer is a highly aggressive type of cancer with very low median survival. Accurate prognosis prediction of breast cancer can spare a significant number of patients from receiving unnecessary adjuvant systemic treatment and its related expensive medical costs. Previous work relies mostly on selected gene expression data to create a predictive model. The emergence of deep learning methods and multi-dimensional data offers opportunities for more comprehensive analysis of the molecular characteristics of breast cancer and therefore can improve diagnosis, treatment, and prevention. In this study, we propose a Multimodal Deep Neural Network by integrating Multi-dimensional Data MDNNMD for the prognosis prediction of breast cancer. The novelty of the method lies in the design of our method's architecture and the fusion of multi-dimensional data. The comprehensive performance evaluation results show that the proposed method achieves a better performance than the prediction methods with single-dimensional data and other existing approaches. The source code implemented by TensorFlow 1.0 deep learning library can be downloaded from the Github: https://github.com/USTC-HIlab/MDNNMD.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{VanDyck:2019:RSP, author = "Michiel {Van Dyck} and Xavier Woot de Trixhe and An Vermeulen and Wim Vanroose", title = "A Robust Simulator for Physiologically Structured Population Models", journal = j-TCBB, volume = "16", number = "3", pages = "851--864", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2810077", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "A framework to simulate physiologically structured population PSP models on high performance compute HPC infrastructure is built. Based on the model of a single cell, billions of cells can be simulated in an efficient way, allowing fast simulation of the interaction of an entire organ with other body parts. Through combination of three state-of-the-art algorithms, the simulation time is decreased with multiple orders of magnitude. First: PSP modelling exploits the fact that a lot of the cells behave identically at the same time which results in multiple orders of magnitude speed-up. The second speed-up is achieved by using an unconditionally stable, partial differential equation solver which allows big time-stepping by trading off speed with precision. The third speed-up is due to the fact that the framework is designed with HPC cluster use in mind. The PSP simulator is mathematically derived to have maximal stability. Simulation results are validated and simulation speed and accuracy are measured.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sharifi:2019:ARC, author = "Maryam Sharifi and Arta A. Jamshidi and Nazanin Namazi Sarvestani", title = "An Adaptive Robust Control Strategy in a Cancer Tumor-Immune System Under Uncertainties", journal = j-TCBB, volume = "16", number = "3", pages = "865--873", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2803175", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, we propose an adaptive robust control for a second order nonlinear model of the interaction between cancer and immune cells of the body to control the growth of cancer and maintain the number of immune cells in an appropriate level. Up to now, most of the control approaches are based on minimizing the drug dosage based on an optimal control structure. However, in many cases, measuring the exact quantity of the model parameters is not possible. This is due to limitation in measuring devices, variational and undetermined characteristics of micro-environmental factors and the variable nature of parameters during the growth and treatment phases of cancer. It is of great importance to present a control strategy that can deal with these variables and unknown factors in a nonlinear model. Adaptive control is a suitable choice to achieve this goal. We assume linear uncertainties for the model parameters and employ a sliding term for updating the estimated parameters and the control signals. Moreover, due to difficulties in measuring the number of immune cells in biological experiments, an estimation technique is applied to infer this value. The convergence of the estimated number of immune cells to the actual value is shown. The stability and convergence of the number of cancer and immune cells to the specified target values are also proved using a time-varying Lyapunov function. Finally, we have shown the performance of the proposed control strategy in the context of various computational results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nanni:2019:BCH, author = "Loris Nanni and Sheryl Brahnam and Stefano Ghidoni and Alessandra Lumini", title = "Bioimage Classification with Handcrafted and Learned Features", journal = j-TCBB, volume = "16", number = "3", pages = "874--885", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2821127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Bioimage classification is increasingly becoming more important in many biological studies including those that require accurate cell phenotype recognition, subcellular localization, and histopathological classification. In this paper, we present a new General Purpose GenP bioimage classification method that can be applied to a large range of classification problems. The GenP system we propose is an ensemble that combines multiple texture features both handcrafted and learned descriptors for superior and generalizable discriminative power. Our ensemble obtains a boosting of performance by combining local features, dense sampling features, and deep learning features. Each descriptor is used to train a different Support Vector Machine that is then combined by sum rule. We evaluate our method on a diverse set of bioimage classification tasks each represented by a benchmark database, including some of those available in the IICBU 2008 database. Each bioimage classification task represents a typical subcellular, cellular, and tissue level classification problem. Our evaluation on these datasets demonstrates that the proposed GenP bioimage ensemble obtains state-of-the-art performance without any ad-hoc dataset tuning of the parameters thereby avoiding any risk of overfitting/overtraining. To reproduce the experiments reported in this paper, the MATLAB code of all the descriptors is available at https://github.com/LorisNanni and https://www.dropbox.com/s/bguw035yrqz0pwp/ElencoCode.docx?dl=0.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cheng:2019:BGA, author = "Haoyu Cheng and Yong Zhang and Yun Xu", title = "{BitMapper2}: a {GPU}-Accelerated All-Mapper Based on the Sparse $q$-Gram Index", journal = j-TCBB, volume = "16", number = "3", pages = "886--897", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2822687", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The explosive growth of next-generation sequencing NGS read datasets drives a need for new faster read mappers. One class of read mappers, called all-mappers, is designed to identify all mapping locations of each read. Many all-mappers have been developed over the past few years, but they are either time-consuming or memory-consuming. Here, we present BitMapper2, a GPU-accelerated read mapper that reports all mapping locations of NGS reads. To make full use of the parallel processing capability of GPUs, BitMapper2 proposes the sparse q-gram index, which reduces the memory requirement and the data transfer time between GPU and CPU. We also design the filtration part and the verification part of BitMapper2 specifically for the architecture of GPU. In addition, BitMapper2 is still time-efficient and memory-efficient even if there is no GPU available. Experiments show that BitMapper2 was significantly faster than the state-of-the-art all-mappers, while requiring less space.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Whidden:2019:CUS, author = "Chris Whidden and Frederick A. Matsen", title = "Calculating the Unrooted Subtree Prune-and-Regraft Distance", journal = j-TCBB, volume = "16", number = "3", pages = "898--911", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2802911", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The subtree prune-and-regraft SPR distance metric is a fundamental way of comparing evolutionary trees. It has wide-ranging applications, such as to study lateral genetic transfer, viral recombination, and Markov chain Monte Carlo phylogenetic inference. Although the rooted version of SPR distance can be computed relatively efficiently between rooted trees using fixed-parameter-tractable maximum agreement forest MAF algorithms, no MAF formulation is known for the unrooted case. Correspondingly, previous algorithms are unable to compute unrooted SPR distances larger than 7. In this paper, we substantially advance understanding of and computational algorithms for the unrooted SPR distance. First, we identify four properties of optimal SPR paths, each of which suggests that no MAF formulation exists in the unrooted case. Then, we introduce the replug distance, a new lower bound on the unrooted SPR distance that is amenable to MAF methods, and give an efficient fixed-parameter algorithm for calculating it. Finally, we develop a ``progressive A*'' search algorithm using multiple heuristics, including the TBR and replug distances, to exactly compute the unrooted SPR distance. Our algorithm is nearly two orders of magnitude faster than previous methods on small trees, and allows computation of unrooted SPR distances as large as 14 on trees with 50 leaves.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:CAG, author = "Pei Wang and Daojie Wang and Jinhu Lu", title = "Controllability Analysis of a Gene Network for \bioname{Arabidopsis thaliana} Reveals Characteristics of Functional Gene Families", journal = j-TCBB, volume = "16", number = "3", pages = "912--924", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2821145", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Based on structural controllability of complex networks and a constructed gene network with 9,241 nodes for Arabidopsis thaliana, we classified nodes into five categories via their roles in control or node deletion, including indispensable, neutral, dispensable, driver, and critical driver nodes. The indispensable nodes can increase the number of drivers after deletion, which are never drivers or critical drivers. About 10 percent of nodes are indispensable. However, more than 60 percent of nodes are neutral ones. More than 62 percent of nodes are drivers, which indicates the gene network is very difficult to be fully controlled. Gene Ontology GO enrichment analysis reveals that different sets of nodes have preferred biological functions and processes. The indispensable nodes are significantly enriched as essential genes, drought responsive and abscisic acid ABA independent genes, transcriptional factors TFs, core cell cycle genes, and ABA and Gibberellin GA related genes. The critical drivers are enriched as receptor kinase-like genes, while shorted in WRKY TFs and functional genes that are enriched in the indispensable nodes. Robustness analysis based on node and edge additions, edge rewiring indicate the obtained conclusions are robust to network perturbations. Our investigations clarify control roles of some gene families and provide potential implications for identifying functional genes in other plant species, such as drought responsive genes and TFs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Thiele:2019:DOE, author = "Sven Thiele and Sandra Heise and Wiebke Hessenkemper and Hannes Bongartz and Melissa Fensky and Fred Schaper and Steffen Klamt", title = "Designing Optimal Experiments to Discriminate Interaction Graph Models", journal = j-TCBB, volume = "16", number = "3", pages = "925--935", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2812184", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Modern methods for the inference of cellular networks from experimental data often express nondeterminism by proposing an ensemble of candidate models with similar properties. To further discriminate among these model candidates, new experiments need to be carried out. Theoretically, the number of possible experiments is exponential in the number of possible perturbations. In praxis, experiments are expensive and usually there exist several constraints limiting which experiments can be performed. Limiting factors may exist on the combinations of perturbations that are technically possible, which components can be measured, and limitations on the number of affordable experiments. Further, not all experiments are equally well suited to discriminate model candidates. Therefore, the goal of optimal experiment design is to determine those experiments that discriminate most of the candidates while minimizing the costs. We present an approach for experiment planning with interaction graph models and sign consistency methods. This new approach can be used in combination with methods for network inference and consistency checking. The proposed method determines experiments which are most suitable to deliver results that reduce the number of candidate models. We applied our method to study the Erythropoietin signal transduction in human kidney cells HEK293. We first used simulated experiment data from an ODE model to demonstrate in silico that our experimental design results in the inference of the gold standard model. Finally, we used the approach to plan in vivo experiments that enabled us to discriminate model candidates for the Erythropoietin signal transduction in this cell line.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2019:DPC, author = "Yi-Yan Zhang and Qin Li and Yi Xin and Wei-Qi Lv", title = "Differentiating Prostate Cancer from Benign Prostatic Hyperplasia Using {PSAD} Based on Machine Learning: Single-Center Retrospective Study in {China}", journal = j-TCBB, volume = "16", number = "3", pages = "936--941", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2822675", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The incidence of prostate cancer increases annually. Prostate cancer is an underreported and emerging problem in China. We conducted a cross-sectional study of 392 eligible patients from 710 men with prostate cancer or benign prostatic hyperplasia between 2000 and 2003. For total prostate-specific antigen, age, three diameters of prostate, prostate volume and prostate-specific antigen density seven indices, analysis of variance, and t test were used to analyze the difference between the groups. A decision tree with pruning was established using the prostate-specific antigen density, age, and transversal diameter of the prostate to screen the patient with prostate cancer. According to the established decision tree model, prostate-specific antigen density was the most important factor affecting the occurrence of prostate cancer. In elderly people over the age of 83 years, the transverse diameter of prostate cancer was smaller than that of benign prostatic hyperplasia, with prostate-specific antigen density less than 0.49 ng/L2. No additional index was introduced, and the detection rate of prostate cancer was 86.6 percent. The specificity was enhanced to 78.1 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kaloudas:2019:EEB, author = "Dimitrios Kaloudas and Nikolet Pavlova and Robert Penchovsky", title = "{EBWS}: Essential Bioinformatics {Web} Services for Sequence Analyses", journal = j-TCBB, volume = "16", number = "3", pages = "942--953", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2816645", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The Essential Bioinformatics Web Services EBWS are implemented on a new PHP-based server that provides useful tools for analyses of DNA, RNA, and protein sequences applying a user-friendly interface. Nine Web-based applets are currently available on the Web server. They include reverse complementary DNA and random DNA/RNA/peptide oligomer generators, a pattern sequence searcher, a DNA restriction cutter, a prokaryotic ORF finder, and a random DNA/RNA mutation generator. It also includes calculators of melting temperature TM of DNA/DNA, RNA/RNA, and DNA/RNA hybrids, a guide RNA gRNA generator for the CRISPR/Cas9 system and an annealing temperature calculator for multiplex PCR. The pattern-searching applet has no limitations in the number of motif inputs and applies a toolbox of Regex quantifiers that can be used for defining complex sequence queries of RNA, DNA, and protein sequences. The DNA enzyme digestion program utilizes a large database of 1,502 restriction enzymes. The gRNA generator has a database of 25 bacterial genomes searchable for gRNA target sequences and has an option for searching in any genome sequence given by the user. All programs are permanently available online at http://penchovsky.atwebpages.com/applications.php without any restrictions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xu:2019:LRE, author = "Bo Xu and Hongfei Lin and Yuan Lin", title = "Learning to Refine Expansion Terms for Biomedical Information Retrieval Using Semantic Resources", journal = j-TCBB, volume = "16", number = "3", pages = "954--966", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2801303", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "With the rapid development of biomedicine, the number of biomedical articles has increased accordingly, which presents a great challenge for biologists trying to keep up with the latest research. Information retrieval seeks to meet this challenge by searching among a large number of articles based on given queries and providing the most relevant ones to fulfill information needs. As an effective information retrieval technique, query expansion has some room for improvement to achieve the desired performance when directly applied for biomedical information retrieval because there exist many domain-related terms both in users' queries and in related articles. To solve this problem, we propose a biomedical query expansion framework based on learning-to-rank methods, in which we refine candidate expansion terms by training term-ranking models to select the most relevant terms. To train the term-ranking models, we first propose a pseudo-relevance feedback method based on MeSH to select candidate expansion terms and then represent the candidate terms as feature vectors by defining both the corpus-based term features and the resource-based term features. Experimental results obtained for TREC genomics datasets show that our method can capture more relevant terms to expand the original query and effectively improve biomedical information retrieval performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{He:2019:MBP, author = "Tiantian He and Keith C. C. Chan", title = "Measuring Boundedness for Protein Complex Identification in {PPI} Networks", journal = j-TCBB, volume = "16", number = "3", pages = "967--979", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2822709", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The problem of identifying protein complexes in Protein-Protein Interaction PPI networks is usually formulated as the problem of identifying dense regions in such networks. In this paper, we present a novel approach, called TBPCI, to identify protein complexes based instead on the concept of a measure of boundedness. Such a measure is defined as an objective function of a Jaccard Index-based connectedness measure which takes into consideration how much two proteins within a network are connected to each other, and an association measure which takes into consideration how much two connecting proteins are associated based on their attributes found in the Gene Ontology database. Based on the above two measures, the objective function is derived to capture how strong the proteins can be considered as bounded together and the objective value is therefore referred as the aggregated degree of boundedness. To identify protein complexes, TBPCI computes the degree of boundedness between all possible pairwise proteins. Then, TBPCI uses a Breadth-First-Search method to determine whether a protein-pair should be incorporated into the same complex. TBPCI has been tested with several real data sets and the experimental results show it is an effective approach for identifying protein complexes in PPI networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:MMV, author = "Aiguo Wang and Ye Chen and Ning An and Jing Yang and Lian Li and Lili Jiang", title = "Microarray Missing Value Imputation: a Regularized Local Learning Method", journal = j-TCBB, volume = "16", number = "3", pages = "980--993", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2810205", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Microarray experiments on gene expression inevitably generate missing values, which impedes further downstream biological analysis. Therefore, it is key to estimate the missing values accurately. Most of the existing imputation methods tend to suffer from the over-fitting problem. In this study, we propose two regularized local learning methods for microarray missing value imputation. Motivated by the grouping effect of $ L_2 $ regularization, after selecting the target gene, we train an $ L_2 $ Regularized Local Least Squares imputation model RLLSimpute_L2 on the target gene and its neighbors to estimate the missing values of the target gene. Furthermore, RLLSimpute_L2 imputes the missing values in an ascending order based on the associated missing rate with each target gene. This contributes to fully utilizing the previously estimated values. Besides $ L_2 $, we further explore $ L_1 $ regularization and propose an $ L_1 $ Regularized Local Least Squares imputation model RLLSimpute_L1. To evaluate their effectiveness, we conducted extensive experimental studies on six benchmark datasets covering both time series and non-time series cases. Nine state-of-the-art imputation methods are compared with RLLSimpute_L2 and RLLSimpute_L1 in terms of three performance metrics. The comparative experimental results indicate that RLLSimpute_L2 outperforms its competitors by achieving smaller imputation errors and better structure preservation of differentially expressed genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Song:2019:PGA, author = "You Song and Siyu Yang and Jinzhi Lei", title = "{ParaCells}: a {GPU} Architecture for Cell-Centered Models in Computational Biology", journal = j-TCBB, volume = "16", number = "3", pages = "994--1006", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814570", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In computational biology, the hierarchy of biological systems requires the development of flexible and powerful computational tools. Graphics processing unit GPU architecture has been a suitable device for parallel computing in simulating multi-cellular systems. However, in modeling complex biological systems, scientists often face two tasks, mathematical formulation and skillful programming. In particular, specific programming skills are needed for GPU programming. Therefore, the development of an easy-to-use computational architecture, which utilizes GPU for parallel computing and provides intuitive interfaces for simple implementation, is needed so that general scientists can perform GPU simulations without knowing much about the GPU architecture. Here, we introduce ParaCells, a cell-centered GPU simulation architecture for NVIDIA compute unified device architecture CUDA. ParaCells was designed as a versatile architecture that connects the user logic in C++ with NVIDIA CUDA runtime and is specific to the modeling of multi-cellular systems. An advantage of ParaCells is its object-oriented model declaration, which allows it to be widely applied to many biological systems through the combination of basic biological concepts. We test ParaCells with two applications. Both applications are significantly faster when compared with sequential as well as parallel OpenMP and OpenACC implementations. Moreover, the simulation programs based on ParaCells are cleaner and more readable than other versions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Patoary:2019:PSD, author = "Mohammad Nazrul Ishlam Patoary and Carl Tropper and Robert A. McDougal and Zhongwei Lin and William W. Lytton", title = "Parallel Stochastic Discrete Event Simulation of Calcium Dynamics in Neuron", journal = j-TCBB, volume = "16", number = "3", pages = "1007--1019", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2756930", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The intra-cellular calcium signaling pathways of a neuron depends on both biochemical reactions and diffusions. Some quasi-isolated compartments e.g., spines are so small and calcium concentrations are so low that one extra molecule diffusing in by chance can make a nontrivial difference in concentration percentage-wise. These rare events can affect dynamics discretely in such a way that they cannot be evaluated by a deterministic and continuous simulation. Stochastic models of such a system provide a more detailed understanding of these systems than existing deterministic models because they capture their behavior at a molecular level. Our research focuses on the development of a high performance parallel discrete event simulation environment, Neuron Time Warp NTW, which is intended for use in the parallel simulation of stochastic reaction-diffusion systems such as intra-calcium signaling. NTW is integrated with NEURON, a simulator which is widely used within the neuroscience community. We simulate two models, a calcium buffer and a calcium wave model. The calcium buffer model is employed in order to verify the correctness and performance of NTW by comparing it to a sequential deterministic simulation in NEURON. We also derived a discrete event calcium wave model from a deterministic model using the stochastic $ \text {IP}_3 \text {R} $ structure.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Fang:2019:PPB, author = "Chao Fang and Yi Shang and Dong Xu", title = "Prediction of Protein Backbone Torsion Angles Using Deep Residual Inception Neural Networks", journal = j-TCBB, volume = "16", number = "3", pages = "1020--1028", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814586", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Prediction of protein backbone torsion angles Psi and Phi can provide important information for protein structure prediction and sequence alignment. Existing methods for Psi-Phi angle prediction have significant room for improvement. In this paper, a new deep residual inception network architecture, called DeepRIN, is proposed for the prediction of Psi-Phi angles. The input to DeepRIN is a feature matrix representing a composition of physico-chemical properties of amino acids, a 20-dimensional position-specific substitution matrix PSSM generated by PSI-BLAST, a 30-dimensional hidden Markov Model sequence profile generated by HHBlits, and predicted eight-state secondary structure features. DeepRIN is designed based on inception networks and residual networks that have performed well on image classification and text recognition. The architecture of DeepRIN enables effective encoding of local and global interactions between amino acids in a protein sequence to achieve accurate prediction. Extensive experimental results show that DeepRIN outperformed the best existing tools significantly. Compared to the recently released state-of-the-art tool, SPIDER3, DeepRIN reduced the Psi angle prediction error by more than 5 degrees and the Phi angle prediction error by more than 2 degrees on average. The executable tool of DeepRIN is available for download at http://dslsrv8.cs.missouri.edu/~cf797/MUFoldAngle/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Urban:2019:DLD, author = "Gregor Urban and Kevin Bache and Duc T. T. Phan and Agua Sobrino and Alexander K. Shmakov and Stephanie J. Hachey and Christopher C. W. Hughes and Pierre Baldi", title = "Deep Learning for Drug Discovery and Cancer Research: Automated Analysis of Vascularization Images", journal = j-TCBB, volume = "16", number = "3", pages = "1029--1035", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2841396", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Likely drug candidates which are identified in traditional pre-clinical drug screens often fail in patient trials, increasing the societal burden of drug discovery. A major contributing factor to this phenomenon is the failure of traditional in vitro models of drug response to accurately mimic many of the more complex properties of human biology. We have recently introduced a new microphysiological system for growing vascularized, perfused microtissues that more accurately models human physiology and is suitable for large drug screens. In this work, we develop a machine learning model that can quickly and accurately flag compounds which effectively disrupt vascular networks from images taken before and after drug application in vitro. The system is based on a convolutional neural network and achieves near perfect accuracy while committing potentially no expensive false negatives.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Singh:2019:EPG, author = "Kumar Saurabh Singh and Bartlomiej J. Troczka and Katherine Beadle and Linda M. Field and T. G. Emyr Davies and Martin S. Williamson and Ralf Nauen and Chris Bass", title = "Extension of Partial Gene Transcripts by Iterative Mapping of {RNA-Seq} Raw Reads", journal = j-TCBB, volume = "16", number = "3", pages = "1036--1041", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2865309", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Many non-model organisms lack reference genomes and the sequencing and de novo assembly of an organisms transcriptome is an affordable means by which to characterize the coding component of its genome. Despite the advances that have made this possible, assembling a transcriptome without a known reference usually results in a collection of full-length and partial gene transcripts. The downstream analysis of genes represented as partial transcripts then often requires further experimental work in the laboratory in order to obtain full-length sequences. We have explored whether partial transcripts, encoding genes of interest present in de novo assembled transcriptomes of a model and non-model insect species, could be further extended by iterative mapping against the raw transcriptome sequencing reads. Partial sequences encoding cytochrome P450s and carboxyl/cholinesterase were used in this analysis, because they are large multigene families and exhibit significant variation in expression. We present an effective method to improve the contiguity of partial transcripts in silico that, in the absence of a reference genome, may be a quick and cost-effective alternative to their extension by laboratory experimentation. Our approach resulted in the successful extension of incompletely assembled transcripts, often to full length. We experimentally validated these results in silico and using real-time PCR and sequencing.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Tu:2019:IGN, author = "Jia-Juan Tu and Le Ou-Yang and Xiaohua Hu and Xiao-Fei Zhang", title = "Inferring Gene Network Rewiring by Combining Gene Expression and Gene Mutation Data", journal = j-TCBB, volume = "16", number = "3", pages = "1042--1048", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2834529", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gene dependency networks often undergo changes with respect to different disease states. Understanding how these networks rewire between two different disease states is an important task in genomic research. Although many computational methods have been proposed to undertake this task via differential network analysis, most of them are designed for a predefined data type. With the development of the high throughput technologies, gene activity measurements can be collected from different aspects e.g., mRNA expression and DNA mutation. These different data types might share some common characteristics and include certain unique properties of data type. New methods are needed to explore the similarity and difference between differential networks estimated from different data types. In this study, we develop a new differential network inference model which identifies gene network rewiring by combining gene expression and gene mutation data. Similarities and differences between different data types are learned via a group bridge penalty function. Simulation studies have demonstrated that our method consistently outperforms the competing methods. We also apply our method to identify gene network rewiring associated with ovarian cancer platinum resistance from The Cancer Genome Atlas data. There are certain differential edges common to both data types and some differential edges unique to individual data types. Hub genes in the differential networks inferred by our method play important roles in ovarian cancer drug resistance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sarkar:2019:NAC, author = "Aisharjya Sarkar and Yuanfang Ren and Rasha Elhesha and Tamer Kahveci", title = "A New Algorithm for Counting Independent Motifs in Probabilistic Networks", journal = j-TCBB, volume = "16", number = "4", pages = "1049--1062", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2821666", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biological networks provide great potential to understand how cells function. Motifs are topological patterns which are repeated frequently in a specific network. Network motifs are key structures through which biological networks operate. However, counting independent i.e., non-overlapping instances of a specific motif remains to be a computationally hard problem. Motif counting problem becomes computationally even harder for biological networks as biological interactions are uncertain events. The main challenge behind this problem is that different embeddings of a given motif in a network can share edges. Such edges can create complex computational dependencies between different instances of the given motif when considering uncertainty of those edges. In this paper, we develop a novel algorithm for counting independent instances of a specific motif topology in probabilistic biological networks. We present a novel mathematical model to capture the dependency between each embedding and all the other embeddings, which it overlaps with. We prove the correctness of this model. We evaluate our model on real and synthetic networks with different probability, and topology models as well as reasonable range of network sizes. Our results demonstrate that our method counts non-overlapping embeddings in practical time for a broad range of networks.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Markin:2019:CMP, author = "Alexey Markin and Oliver Eulenstein", title = "Computing {Manhattan} Path-Difference Median Trees: a Practical Local Search Approach", journal = j-TCBB, volume = "16", number = "4", pages = "1063--1076", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2718507", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Median tree problems are powerful tools for inferring large-scale phylogenetic trees that hold enormous promise for society at large. Such problems seek a median tree for a given collection of input trees under some problem-specific distance. Here, we introduce a median tree problem under the classic Manhattan path-difference distance. We show that this problem is NP-hard, devise an ILP formulation, and provide an effective local search heuristic that is based on solving a local search problem exactly. Our algorithm for the local search problem improves asymptotically by a factor of $n$ on the best-known na{\"\i}ve solution, where $n$ is the overall number of taxa in the input trees. Finally, comparative phylogenetic studies using considerably large empirical data and an accuracy analysis for smaller phylogenetic trees reveal the ability of our novel heuristic.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kordi:2019:EAD, author = "Misagh Kordi and Mukul S. Bansal", title = "Exact Algorithms for Duplication-Transfer-Loss Reconciliation with Non-Binary Gene Trees", journal = j-TCBB, volume = "16", number = "4", pages = "1077--1090", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2710342", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Duplication-Transfer-Loss DTL reconciliation is a powerful method for studying gene family evolution in the presence of horizontal gene transfer. DTL reconciliation seeks to reconcile gene trees with species trees by postulating speciation, duplication, transfer, and loss events. Efficient algorithms exist for finding optimal DTL reconciliations when the gene tree is binary. In practice, however, gene trees are often non-binary due to uncertainty in the gene tree topologies, and DTL reconciliation with non-binary gene trees is known to be NP-hard. In this paper, we present the first exact algorithms for DTL reconciliation with non-binary gene trees. Specifically, we i show that the DTL reconciliation problem for non-binary gene trees is fixed-parameter tractable in the maximum degree of the gene tree, ii present an exponential-time, but in-practice efficient, algorithm to track and enumerate all optimal binary resolutions of a non-binary input gene tree, and iii apply our algorithms to a large empirical data set of over 4,700 gene trees from 100 species to study the impact of gene tree uncertainty on DTL-reconciliation and to demonstrate the applicability and utility of our algorithms. The new techniques and algorithms introduced in this paper will help biologists avoid incorrect evolutionary inferences caused by gene tree uncertainty.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ghosh:2019:FFS, author = "Priyanka Ghosh and Ananth Kalyanaraman", title = "{FastEtch}: a Fast Sketch-Based Assembler for Genomes", journal = j-TCBB, volume = "16", number = "4", pages = "1091--1106", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2737999", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "De novo genome assembly describes the process of reconstructing an unknown genome from a large collection of short or long reads sequenced from the genome. A single run of a Next-Generation Sequencing NGS technology can produce billions of short reads, making genome assembly computationally demanding both in terms of memory and time. One of the major computational steps in modern day short read assemblers involves the construction and use of a string data structure called the de Bruijn graph. In fact, a majority of short read assemblers build the complete de Bruijn graph for the set of input reads, and subsequently traverse and prune low-quality edges, in order to generate genomic ``contigs''-the output of assembly. These steps of graph construction and traversal, contribute to well over 90 percent of the runtime and memory. In this paper, we present a fast algorithm, FastEtch, that uses sketching to build an approximate version of the de Bruijn graph for the purpose of generating an assembly. The algorithm uses Count-Min sketch, which is a probabilistic data structure for streaming data sets. The result is an approximate de Bruijn graph that stores information pertaining only to a selected subset of nodes that are most likely to contribute to the contig generation step. In addition, edges are not stored; instead that fraction which contribute to our contig generation are detected on-the-fly. This approximate approach is intended to significantly improve performance both execution time and memory footprint whilst possibly compromising on the output assembly quality. We present two main versions of the assembler-one that generates an assembly, where each contig represents a contiguous genomic region from one strand of the DNA, and another that generates an assembly, where the contigs can straddle either of the two strands of the DNA. For further scalability, we have implemented a multi-threaded parallel code. Experimental results using our algorithm conducted on E. coli, Yeast, C. elegans, and Human Chr2 and Chr2+3 genomes show that our method yields one of the best time-memory-quality trade-offs, when compared against many state-of-the-art genome assemblers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Choudhury:2019:HAE, author = "Olivia Choudhury and Ankush Chakrabarty and Scott J. Emrich", title = "Highly Accurate and Efficient Data-Driven Methods for Genotype Imputation", journal = j-TCBB, volume = "16", number = "4", pages = "1107--1116", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2708701", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "High-throughput sequencing techniques have generated massive quantities of genotype data. Haplotype phasing has proven to be a useful and effective method for analyzing these data. However, the quality of phasing is undermined due to missing information. Imputation provides an effective means of improving the underlying genotype information. For model organisms, imputation can rely on an available reference genotype panel and a physical or genetic map. For non-model organisms, which often do not have a genotype panel, it is important to design an imputation technique that does not rely on reference data. Here, we present Accurate Data-Driven Imputation Technique ADDIT, which is composed of two data-driven algorithms capable of handling data generated from model and non-model organisms. The non-model variant of ADDIT referred to as ADDIT-NM employs statistical inference methods to impute missing genotypes, whereas the model variant referred to as ADDIT-M leverages a supervised learning-based approach for imputation. We demonstrate that both variants of ADDIT are more accurate, faster, and require less memory than leading state-of-the-art imputation tools using model human and non-model maize, apple, and grape genotype data. Software Availability: The source code of ADDIT and test data sets are available at https://github.com/NDBL/ADDIT.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Pan:2019:KFP, author = "Tony Pan and Patrick Flick and Chirag Jain and Yongchao Liu and Srinivas Aluru", title = "{Kmerind}: a Flexible Parallel Library for {$K$}-mer Indexing of Biological Sequences on Distributed Memory Systems", journal = j-TCBB, volume = "16", number = "4", pages = "1117--1131", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2760829", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Counting and indexing fixed length substrings, or $k$-mers, in biological sequences is a key step in many bioinformatics tasks including genome alignment and mapping, genome assembly, and error correction. While advances in next generation sequencing technologies have dramatically reduced the cost and improved latency and throughput, few bioinformatics tools can efficiently process the datasets at the current generation rate of 1.8 terabases per 3-day experiment from a single sequencer. We present Kmerind, a high performance parallel $k$-mer indexing library for distributed memory environments. The Kmerind library provides a set of simple and consistent APIs with sequential semantics and parallel implementations that are designed to be flexible and extensible. Kmerind's $k$-mer counter performs similarly or better than the best existing $k$-mer counting tools even on shared memory systems. In a distributed memory environment, Kmerind counts $k$-mers in a 120 GB sequence read dataset in less than 13 seconds on 1024 Xeon CPU cores, and fully indexes their positions in approximately 17 seconds. Querying for 1 percent of the $k$-mers in these indices can be completed in 0.23 seconds and 28 seconds, respectively. Kmerind is the first $k$-mer indexing library for distributed memory environments, and the first extensible library for general $k$-mer indexing and counting. Kmerind is available at https://github.com/ParBLiSS/kmerind.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Siragusa:2019:LTA, author = "Enrico Siragusa and Niina Haiminen and Filippo Utro and Laxmi Parida", title = "Linear Time Algorithms to Construct Populations Fitting Multiple Constraint Distributions at Genomic Scales", journal = j-TCBB, volume = "16", number = "4", pages = "1132--1142", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2760879", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computer simulations can be used to study population genetic methods, models, and parameters, as well as to predict potential outcomes. For example, in plant populations, predicting the outcome of breeding operations can be studied using simulations. In-silico construction of populations with pre-specified characteristics is an important task in breeding optimization and other population genetic studies. We present two linear time Simulation using Best-fit Algorithms SimBA for two classes of problems where each co-fits two distributions: SimBA-LD fits linkage disequilibrium and minimum allele frequency distributions, while SimBA-hap fits founder-haplotype and polyploid allele dosage distributions. An incremental gap-filling version of previously introduced SimBA-LD is here demonstrated to accurately fit the target distributions, allowing efficient large scale simulations. SimBA-hap accuracy and efficiency is demonstrated by simulating tetraploid populations with varying numbers of founder haplotypes, we evaluate both a linear time greedy algoritm and an optimal solution based on mixed-integer programming. SimBA is available on http://researcher.watson.ibm.com/project/5669.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Verma:2019:POC, author = "Deeptak Verma and Gevorg Grigoryan and Chris Bailey-Kellogg", title = "{Pareto} Optimization of Combinatorial Mutagenesis Libraries", journal = j-TCBB, volume = "16", number = "4", pages = "1143--1153", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858794", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In order to increase the hit rate of discovering diverse, beneficial protein variants via high-throughput screening, we have developed a computational method to optimize combinatorial mutagenesis libraries for overall enrichment in two distinct properties of interest. Given scoring functions for evaluating individual variants, POCoM Pareto Optimal Combinatorial Mutagenesis scores entire libraries in terms of averages over their constituent members, and designs optimal libraries as sets of mutations whose combinations make the best trade-offs between average scores. This represents the first general-purpose method to directly design combinatorial libraries for multiple objectives characterizing their constituent members. Despite being rigorous in mapping out the Pareto frontier, it is also very fast even for very large libraries e.g., designing 30 mutation, billion-member libraries in only hours. We here instantiate POCoM with scores based on a target's protein structure and its homologs' sequences, enabling the design of libraries containing variants balancing these two important yet quite different types of information. We demonstrate POCoM's generality and power in case study applications to green fluorescent protein, cytochrome P450, and $ \beta $-lactamase. Analysis of the POCoM library designs provides insights into the trade-offs between structure- and sequence-based scores, as well as the impacts of experimental constraints on library designs. POCoM libraries incorporate mutations that have previously been found favorable experimentally, while diversifying the contexts in which these mutations are situated and maintaining overall variant quality.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rasheed:2019:SFU, author = "Muhibur Rasheed and Nathan Clement and Abhishek Bhowmick and Chandrajit L. Bajaj", title = "Statistical Framework for Uncertainty Quantification in Computational Molecular Modeling", journal = j-TCBB, volume = "16", number = "4", pages = "1154--1167", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2771240", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "As computational modeling, simulation, and predictions are becoming integral parts of biomedical pipelines, it behooves us to emphasize the reliability of the computational protocol. For any reported quantity of interest QOI, one must also compute and report a measure of the uncertainty or error associated with the QOI. This is especially important in molecular modeling, since in most practical applications the inputs to the computational protocol are often noisy, incomplete, or low-resolution. Unfortunately, currently available modeling tools do not account for uncertainties and their effect on the final QOIs with sufficient rigor. We have developed a statistical framework that expresses the uncertainty of the QOI as the probability that the reported value deviates from the true value by more than some user-defined threshold. First, we provide a theoretical approach where this probability can be bounded using Azuma-Hoeffding like inequalities. Second, we approximate this probability empirically by sampling the space of uncertainties of the input and provide applications of our framework to bound uncertainties of several QOIs commonly used in molecular modeling. Finally, we also present several visualization techniques to effectively and quantitavely visualize the uncertainties: in the input, final QOIs, and also intermediate states.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bai:2019:SGM, author = "Wenruo Bai and Jeffrey Bilmes and William S. Noble", title = "Submodular Generalized Matching for Peptide Identification in Tandem Mass Spectrometry", journal = j-TCBB, volume = "16", number = "4", pages = "1168--1181", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2822280", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Motivation: Identification of spectra produced by a shotgun proteomics mass spectrometry experiment is commonly performed by searching the observed spectra against a peptide database. The heart of this search procedure is a score function that evaluates the quality of a hypothesized match between an observed spectrum and a theoretical spectrum corresponding to a particular peptide sequence. Accordingly, the success of a spectrum analysis pipeline depends critically upon this peptide-spectrum score function. We develop peptide-spectrum score functions that compute the maximum value of a submodular function under $m$ matroid constraints. We call this procedure a submodular generalized matching SGM since it generalizes bipartite matching. We use a greedy algorithm to compute maximization, which can achieve a solution whose objective is guaranteed to be at least $ \frac {1}{1 + m}$ of the true optimum. The advantage of the SGM framework is that known long-range properties of experimental spectra can be modeled by designing suitable submodular functions and matroid constraints. Experiments on four data sets from various organisms and mass spectrometry platforms show that the SGM approach leads to significantly improved performance compared to several state-of-the-art methods. Supplementary information, C++ source code, and data sets can be found at https://melodi-lab.github.io/SGM.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zou:2019:AML, author = "Quan Zou and Qi Liu", title = "Advanced Machine Learning Techniques for Bioinformatics", journal = j-TCBB, volume = "16", number = "4", pages = "1182--1183", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919039", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section focus on the machine learning methods, and applications of these methods to computational biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2019:HOC, author = "Qinhu Zhang and Lin Zhu and De-Shuang Huang", title = "High-Order Convolutional Neural Network Architecture for Predicting {DNA}-Protein Binding Sites", journal = j-TCBB, volume = "16", number = "4", pages = "1184--1192", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2819660", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Although Deep learning algorithms have outperformed conventional methods in predicting the sequence specificities of DNA-protein binding, they lack to consider the dependencies among nucleotides and the diverse binding lengths for different transcription factors TFs. To address the above two limitations simultaneously, in this paper, we propose a high-order convolutional neural network architecture HOCNN, which employs a high-order encoding method to build high-order dependencies among nucleotides, and a multi-scale convolutional layer to capture the motif features of different length. The experimental results on real ChIP-seq datasets show that the proposed method outperforms the state-of-the-art deep learning method DeepBind in the motif discovery task. In addition, we provide further insights about the importance of introducing additional convolutional kernels and the degeneration problem of importing high-order in the motif discovery task.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:AIC, author = "Min Li and Zhihui Fei and Min Zeng and Fang-Xiang Wu and Yaohang Li and Yi Pan and Jianxin Wang", title = "Automated {ICD-9} Coding via A Deep Learning Approach", journal = j-TCBB, volume = "16", number = "4", pages = "1193--1202", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2817488", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "ICD-9 the Ninth Revision of International Classification of Diseases is widely used to describe a patient's diagnosis. Accurate automated ICD-9 coding is important because manual coding is expensive, time-consuming, and inefficient. Inspired by the recent successes of deep learning, in this study, we present a deep learning framework called DeepLabeler to automatically assign ICD-9 codes. DeepLabeler combines the convolutional neural network with the 'Document to Vector' technique to extract and encode local and global features. Our proposed DeepLabeler demonstrates its effectiveness by achieving state-of-the-art performance, i.e., 0.335 micro F-measure on MIMIC-II dataset and 0.408 micro F-measure on MIMIC-III dataset. It outperforms classical hierarchy-based SVM and flat-SVM both on these two datasets by at least 14 percent. Furthermore, we analyze the deep neural network structure to discover the vital elements in the success of DeepLabeler. We find that the convolutional neural network is the most effective component in our network and the 'Document to Vector' technique is also necessary for enhancing classification performance since it extracts well-recognized global features. Extensive experimental results demonstrate that the great promise of deep learning techniques in the field of text multi-label classification and automated medical coding.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2019:PCP, author = "Bin Liu and Shumin Li", title = "{ProtDet-CCH}: Protein Remote Homology Detection by Combining Long Short-Term Memory and Ranking Methods", journal = j-TCBB, volume = "16", number = "4", pages = "1203--1210", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2789880", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "As one of the most challenging tasks in sequence analysis, protein remote homology detection has been extensively studied. Methods based on discriminative models and ranking approaches have achieved the state-of-the-art performance, and these two kinds of methods are complementary. In this study, three LSTM models have been applied to construct the predictors for protein remote homology detection, including ULSTM, BLSTM, and CNN-BLSTM. They are able to automatically extract the local and global sequence order information. Combined with PSSMs, the CNN-BLSTM achieved the best performance among the three LSTM-based models. We named this method as CNN-BLSTM-PSSM. Finally, a new method called ProtDet-CCH was proposed by combining CNN-BLSTM-PSSM and a ranking method HHblits. Tested on a widely used SCOP benchmark dataset, ProtDet-CCH achieved an ROC score of 0.998, and an ROC50 score of 0.982, significantly outperforming other existing state-of-the-art methods. Experimental results on two updated SCOPe independent datasets showed that ProtDet-CCH can achieve stable performance. Furthermore, our method can provide useful insights for studying the features and motifs of protein families and superfamilies. It is anticipated that ProtDet-CCH will become a very useful tool for protein remote homology detection.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2019:CPS, author = "Bingqiang Liu and Ling Han and Xiangrong Liu and Jichang Wu and Qin Ma", title = "Computational Prediction of Sigma-54 Promoters in Bacterial Genomes by Integrating Motif Finding and Machine Learning Strategies", journal = j-TCBB, volume = "16", number = "4", pages = "1211--1218", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2816032", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sigma factor, as a unit of RNA polymerase holoenzyme, is a critical factor in the process of gene transcriptional regulation. It recognizes the specific DNA sites and brings the core enzyme of RNA polymerase to the upstream regions of target genes. Therefore, the prediction of the promoters for a particular sigma factor is essential for interpreting functional genomic data and observation. This paper develops a new method to predict sigma-54 promoters in bacterial genomes. The new method organically integrates motif finding and machine learning strategies to capture the intrinsic features of sigma-54 promoters. The experiments on E. coli benchmark test set show that our method has good capability to distinguish sigma-54 promoters from surrounding or randomly selected DNA sequences. The applications of the other three bacterial genomes indicate the potential robustness and applicative power of our method on a large number of bacterial genomes. The source code of our method can be freely downloaded at https://github.com/maqin2001/PromotePredictor.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Liu:2019:TBP, author = "Hongyu Liu and Qinmin Vivian Hu and Liang He", title = "Term-Based Personalization for Feature Selection in Clinical Handover Form Auto-Filling", journal = j-TCBB, volume = "16", number = "4", pages = "1219--1230", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2874237", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Feature learning and selection have been widely applied in many research areas because of their good performance and lower complexity. Traditional methods usually treat all terms with same feature sets, such that performance can be damaged when noisy information is brought via wrong features for a given term. In this paper, we propose a term-based personalization approach to finding the best features for each term. First, features are given as the input so that we focus on selection strategies. Second, the importance of each feature subset to a given term is evaluated by the term-feature probabilistic relevance model. We present a feature searching method to generate feature candidate subsets for each term, since evaluating all the possible feature subsets is computationally intensive. Finally, we obtain the personalized feature set for each term as a subset of all features. Experiments have been conducted on the NICTA Synthetic Nursing Handover dataset and the results show that our approach is promising and effective.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Su:2019:DMD, author = "Ran Su and Huichen Wu and Bo Xu and Xiaofeng Liu and Leyi Wei", title = "Developing a Multi-Dose Computational Model for Drug-Induced Hepatotoxicity Prediction Based on Toxicogenomics Data", journal = j-TCBB, volume = "16", number = "4", pages = "1231--1239", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858756", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Drug-induced hepatotoxicity may cause acute and chronic liver disease, leading to great concern for patient safety. It is also one of the main reasons for drug withdrawal from the market. Toxicogenomics data has been widely used in hepatotoxicity prediction. In our study, we proposed a multi-dose computational model to predict the drug-induced hepatotoxicity based on gene expression and toxicity data. The dose/concentration information after drug treatment is fully utilized in our study based on the dose-response curve, thus a more informative representative of the dose-response relationship is considered. We also proposed a new feature selection method, named MEMO, which is also one important aspect of our multi-dose model in our study, to deal with the high-dimensional toxicogenomics data. We validated the proposed model using the TG-GATEs, which is a large database recording toxicogenomics data from multiple views. The experimental results show that the drug-induced hepatotoxicity can be predicted with high accuracy and efficiency using the proposed predictive model.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yu:2019:HPB, author = "Liang Yu and Lin Gao", title = "Human Pathway-Based Disease Network", journal = j-TCBB, volume = "16", number = "4", pages = "1240--1249", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2774802", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Constructing disease-disease similarity network is important in elucidating the associations between the origin and molecular mechanism of diseases, and in researching disease function and medical research. In this paper, we use a high-quality protein interaction network and a collection of pathway databases to construct a Human Pathway-based Disease Network HPDN to explore the relationship between diseases and their intrinsic interactions. We find that the similarity of two diseases has a strong correlation with the number of their shared functional pathways and the interaction between their related gene sets. Comparing HPDN with disease networks based on genes and symptoms respectively, we find the three networks have high overlap rates. Additionally, HPDN can predict new disease-disease correlations, which are supported by Comparative Toxicogenomics Database CTD benchmark and large-scale biomedical literature database. The comprehensive, high-quality relations between diseases based on pathways can further be applied to study important matters in systems medicine, for instance, drug repurposing. Based on a dense subgraph in our network, we find two drugs, prednisone and folic acid, may have new indications, which will provide potential directions for the treatments of complex diseases.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Imani:2019:CGR, author = "Mahdi Imani and Ulisses M. Braga-Neto", title = "Control of Gene Regulatory Networks Using {Bayesian} Inverse Reinforcement Learning", journal = j-TCBB, volume = "16", number = "4", pages = "1250--1261", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2830357", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Control of gene regulatory networks GRNs to shift gene expression from undesirable states to desirable ones has received much attention in recent years. Most of the existing methods assume that the cost of intervention at each state and time point, referred to as the immediate cost function, is fully known. In this paper, we employ the Partially-Observed Boolean Dynamical System POBDS signal model for a time sequence of noisy expression measurement from a Boolean GRN and develop a Bayesian Inverse Reinforcement Learning BIRL approach to address the realistic case in which the only available knowledge regarding the immediate cost function is provided by the sequence of measurements and interventions recorded in an experimental setting by an expert. The Boolean Kalman Smoother BKS algorithm is used for optimally mapping the available gene-expression data into a sequence of Boolean states, and then the BIRL method is efficiently combined with the Q-learning algorithm for quantification of the immediate cost function. The performance of the proposed methodology is investigated by applying a state-feedback controller to two GRN models: a melanoma WNT5A Boolean network and a p53-MDM2 negative feedback loop Boolean network, when the cost of the undesirable states, and thus the identity of the undesirable genes, is learned using the proposed methodology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Guo:2019:GES, author = "Feng-Biao Guo and Xinglai Ji and Jian Huang", title = "Guest Editorial for Special Section on the {7th National Conference on Bioinformatics and Systems Biology of China}", journal = j-TCBB, volume = "16", number = "4", pages = "1262--1263", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2918969", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The eight papers in this special section were presented at the 7th National Conference on Bioinformatics and Systems Biology of China in 2016. The conference is the most influential conference of the Chinese scientific community of bioinformatics and systems biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wei:2019:FPP, author = "Leyi Wei and Pengwei Xing and Gaotao Shi and Zhiliang Ji and Quan Zou", title = "Fast Prediction of Protein Methylation Sites Using a Sequence-Based Feature Selection Technique", journal = j-TCBB, volume = "16", number = "4", pages = "1264--1273", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2670558", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Protein methylation, an important post-translational modification, plays crucial roles in many cellular processes. The accurate prediction of protein methylation sites is fundamentally important for revealing the molecular mechanisms undergoing methylation. In recent years, computational prediction based on machine learning algorithms has emerged as a powerful and robust approach for identifying methylation sites, and much progress has been made in predictive performance improvement. However, the predictive performance of existing methods is not satisfactory in terms of overall accuracy. Motivated by this, we propose a novel random-forest-based predictor called MePred-RF, integrating several discriminative sequence-based feature descriptors and improving feature representation capability using a powerful feature selection technique. Importantly, unlike other methods based on multiple, complex information inputs, our proposed MePred-RF is based on sequence information alone. Comparative studies on benchmark datasets via vigorous jackknife tests indicate that our proposed MePred-RF method remarkably outperforms other state-of-the-art predictors, leading by a 4.5 percent average in terms of overall accuracy. A user-friendly webserver that implements the proposed method has been established for researchers' convenience, and is now freely available for public use through http://server.malab.cn/MePred-RF. We anticipate our research tool to be useful for the large-scale prediction and analysis of protein methylation sites.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lin:2019:IBE, author = "Yan Lin and Fa-Zhan Zhang and Kai Xue and Yi-Zhou Gao and Feng-Biao Guo", title = "Identifying Bacterial Essential Genes Based on a Feature-Integrated Method", journal = j-TCBB, volume = "16", number = "4", pages = "1274--1279", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2669968", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Essential genes are those genes of an organism that are considered to be crucial for its survival. Identification of essential genes is therefore of great significance to advance our understanding of the principles of cellular life. We have developed a novel computational method, which can effectively predict bacterial essential genes by extracting and integrating homologous features, protein domain feature, gene intrinsic features, and network topological features. By performing the principal component regression PCR analysis for Escherichia coli MG1655, we established a classification model with the average area under curve AUC value of 0.992 in ten times 5-fold cross-validation tests. Furthermore, when employing this new model to a distantly related organism-Streptococcus pneumoniae TIGR4, we still got a reliable AUC value of 0.788. These results indicate that our feature-integrated approach could have practical applications in accurately investigating essential genes from broad bacterial species, and also provide helpful guidelines for the minimal cell.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shi:2019:CED, author = "Yan Shi and Bolun Zhang and Maolin Cai and Weiqing Xu", title = "Coupling Effect of Double Lungs on a {VCV} Ventilator with Automatic Secretion Clearance Function", journal = j-TCBB, volume = "16", number = "4", pages = "1280--1287", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2670079", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "For patients with mechanical ventilation, secretions in airway are harmful and sometimes even mortal, it's of great significance to clear secretion timely and efficiently. In this paper, a new secretion clearance method for VCV volume-controlled ventilation ventilator is put forward, and a secretion clearance system with a VCV ventilator and double lungs is designed. Furthermore, the mathematical model of the secretion clearance system is built and verified via experimental study. Finally, to illustrate the influence of key parameters of respiratory system and secretion clearance system on the secretion clearance characteristics, coupling effects of two lungs on VCV secretion clearance system are studied by an orthogonal experiment, it can be obtained that rise of tidal volume adds to efficiency of secretion clearance while effect of area, compliance, and suction pressure on efficiency of secretion clearance needs further study. Rise of compliance improves bottom pressure of secretion clearance while rise of area, tidal volume, and suction pressure decreases bottom pressure of secretion clearance. This paper can be referred to in researches of secretion clearance for VCV.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:IFI, author = "Ying Li and Ye He and Siyu Han and Yanchun Liang", title = "Identification and Functional Inference for Tumor-Associated Long Non-Coding {RNA}", journal = j-TCBB, volume = "16", number = "4", pages = "1288--1301", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2687442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Gastric cancer is one of the top leading causes of cancer mortality worldwide especially in China. In recent years, some lncRNAs are discovered to be dysregulated in many cancers. The study on long non-coding RNAs lncRNAs relationship with cancers has attracted increasing attention. The molecular mechanism of gastric cancer remains largely unclear factors, especially for lncRNAs. Experiments are feasible to obtain related information, however, experimental identification of cancer-related lncRNAs usually possesses high time complexity and high cost. In this paper, a computational method is proposed to determine the relationship between lncRNA and gastric cancer by reusing the exon-based array of gastric cancer. One specific lncRNAs LINC00365 and its target differentially expressed genes whose products are predicted as blood, urine, or salvia-excretory are identified to be candidates for a combined biomarker for gastric cancer. Further biological function and molecular mechanism of the gastric cancer related lncRNAs and coding gene biomarkers are inferred in terms of multi-source biological knowledge.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhang:2019:CMV, author = "Jun-Ping Zhang and Yi Liu and Wei Sun and Xiao-Yang Zhao and La Ta and Wei-Sheng Guo", title = "Characteristics of Myosin {V} Processivity", journal = j-TCBB, volume = "16", number = "4", pages = "1302--1308", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2669311", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Myosin V is a processive doubled-headed biomolecular motor involved in many intracellular organelle and vesicle transport. The unidirectional movement is coupled with the adenosine triphosphate ATP hydrolysis and product release cycle. With the progress of experimental techniques and the enhancement of measuring directness, detailed knowledge of the motility of myosin V has been obtained. Following the ATPase cycle, the 4-state mechanochemical model of the myosin V's processive movement is used. The transitions between various states take place in a stochastic manner. We can use the master equation to analyze and calculate quantitatively. Meanwhile, the effect of the reverse reaction is taken fully into account. We fit the mean velocity, the mean dwell time, the mean run length, and the ratio of forward/backward steps as a function of ATP, ADP, and Pi concertration. The theoretical curves are generally in line with the experimental data. This work provides a new insight for the characteristic of myosin V.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feng:2019:PAP, author = "Pengmian Feng and Zhenyi Wang and Xiaoyu Yu", title = "Predicting Antimicrobial Peptides by Using Increment of Diversity with Quadratic Discriminant Analysis Method", journal = j-TCBB, volume = "16", number = "4", pages = "1309--1312", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2669302", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Antimicrobial peptides are crucial components of the innate host defense system of most living organisms and promising candidates for antimicrobial agents. Accurate classification of antimicrobial peptides will be helpful to the discovery of new therapeutic targets. In this work, the Increment of Diversity with Quadratic Discriminant analysis IDQD was presented to classify antifungal and antibacterial peptides based on primary sequence information. In the jackknife test, the proposed IDQD model yields an accuracy of 86.02 percent with the sensitivity of 74.31 percent and specificity of 92.79 percent for identifying antimicrobial peptides, which is superior to other state-of-the-art methods. This result suggests that the proposed IDQD model can be efficiently used to antimicrobial peptide classification.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Chai:2019:HWT, author = "Guoshi Chai and Min Yu and Lixu Jiang and Yaocong Duan and Jian Huang", title = "{HMMCAS}: a {Web} Tool for the Identification and Domain Annotations of {CAS} Proteins", journal = j-TCBB, volume = "16", number = "4", pages = "1313--1315", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2665542", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The CRISPR-Cas clustered regularly interspaced short palindromic repeats-CRISPR-associated proteins adaptive immune systems are discovered in many bacteria and most archaea. These systems are encoded by cas CRISPR-associated operons that have an extremely diverse architecture. The most crucial step in the depiction of cas operons composition is the identification of cas genes or Cas proteins. With the continuous increase of the newly sequenced archaeal and bacterial genomes, the recognition of new Cas proteins is becoming possible, which not only provides candidates for novel genome editing tools but also helps to understand the prokaryotic immune system better. Here, we describe HMMCAS, a web service for the detection of CRISPR-associated structural and functional domains in protein sequences. HMMCAS uses hmmscan similarity search algorithm in HMMER3.1 to provide a fast, interactive service based on a comprehensive collection of hidden Markov models of Cas protein family. It can accurately identify the Cas proteins including those fusion proteins, for example the Cas1-Cas4 fusion protein in Candidatus Chloracidobacterium thermophilum B Cab. thermophilum B. HMMCAS can also find putative cas operon and determine which type it belongs to. HMMCAS is freely available at http://i.uestc.edu.cn/hmmcas.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Lin:2019:ISP, author = "Hao Lin and Zhi-Yong Liang and Hua Tang and Wei Chen", title = "Identifying Sigma70 Promoters with Novel Pseudo Nucleotide Composition", journal = j-TCBB, volume = "16", number = "4", pages = "1316--1321", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2666141", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Promoters are DNA regulatory elements located directly upstream or at the 5' end of the transcription initiation site TSS, which are in charge of gene transcription initiation. With the completion of a large number of microorganism genomics, it is urgent to predict promoters accurately in bacteria by using the computational method. In this work, a sequence-based predictor named ``iPro70-PseZNC'' was designed for identifying sigma70 promoters in prokaryote. In the predictor, the samples of DNA sequences are formulated by a novel pseudo nucleotide composition, called PseZNC, into which the multi-window Z-curve composition and six local DNA structural properties are incorporated. In the 5-fold cross-validation, the area under the curve of receiver operating characteristic of 0.909 was obtained on our benchmark dataset, indicating that the proposed predictor is promising and will provide an important guide in this area. Further studies showed that the performance of PseZNC is better than it of multi-window Z-curve composition. For the sake of convenience for researchers, a user-friendly online service was established and can be freely accessible at http://lin.uestc.edu.cn/server/iPro70-PseZNC. The PseZNC approach can be also extended to other DNA-related problems.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Ayday:2019:GIW, author = "Erman Ayday and Muhammad Naveed and Haixu Tang", title = "{GenoPri'17: International Workshop on Genome Privacy and Security}", journal = j-TCBB, volume = "16", number = "4", pages = "1322--1323", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2891029", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The four papers in this special section were presented at the 4th International Workshop on Genome Privacy and Security GenoPri in 2017. This workshop aimed to bring together a highly interdisciplinary community involved in all aspects of genome privacy and security research. This workshop built on its three predecessors, GenoPri'14, GenoPri'15, and GenoPri'16 which were collocated with the Privacy Enhancing Technologies Symposium PETS, IEEE Symposium on Security and Privacy, and American Medical Informatics Association Annual Fall Symposium AMIA, respectively. Over the past several decades, genome sequencing technologies have evolved from slow and expensive systems that were limited in access to a select few scientists and forensics investigators to high-throughput, relatively low-cost tools that are available to consumers.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Senf:2019:EES, author = "Alexander Senf", title = "End-to-End Security for Local and Remote Human Genetic Data Applications at the {EGA}", journal = j-TCBB, volume = "16", number = "4", pages = "1324--1327", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2916810", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Sensitive genomic data should remain secure --- whether on disk for storage, or analysis, or in transport. However, secure storage, delivery, and usage of genomic data is complicated by the size of files and diversity of workflows. This paper presents solutions developed by GA4GH and EGA to use custom-ized encryption, encrypted file formats, toolchain integration, and intelligent APIs to help solve this problem.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Raisaro:2019:MES, author = "Jean Louis Raisaro and Juan Ramon Troncoso-Pastoriza and Mickael Misbach and Joao Sa Sousa and Sylvain Pradervand and Edoardo Missiaglia and Olivier Michielin and Bryan Ford and Jean-Pierre Hubaux", title = "{MedCo}: Enabling Secure and Privacy-Preserving Exploration of Distributed Clinical and Genomic Data", journal = j-TCBB, volume = "16", number = "4", pages = "1328--1341", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2854776", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The increasing number of health-data breaches is creating a complicated environment for medical-data sharing and, consequently, for medical progress. Therefore, the development of new solutions that can reassure clinical sites by enabling privacy-preserving sharing of sensitive medical data in compliance with stringent regulations e.g., HIPAA, GDPR is now more urgent than ever. In this work, we introduce MedCo, the first operational system that enables a group of clinical sites to federate and collectively protect their data in order to share them with external investigators without worrying about security and privacy concerns. MedCo uses a collective homomorphic encryption to provide trust decentralization and end-to-end confidentiality protection, and b obfuscation techniques to achieve formal notions of privacy, such as differential privacy. A critical feature of MedCo is that it is fully integrated within the i2b2 Informatics for Integrating Biology and the Bedside framework, currently used in more than 300 hospitals worldwide. Therefore, it is easily adoptable by clinical sites. We demonstrate MedCo's practicality by testing it on data from The Cancer Genome Atlas in a simulated network of three institutions. Its performance is comparable to the ones of SHRINE networked i2b2, which, in contrast, does not provide any data protection guarantee.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Baker:2019:PPL, author = "Dixie B. Baker and Bartha M. Knoppers and Mark Phillips and David van Enckevort and Petra Kaufmann and Hanns Lochmuller and Domenica Taruscio", title = "Privacy-Preserving Linkage of Genomic and Clinical Data Sets", journal = j-TCBB, volume = "16", number = "4", pages = "1342--1348", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2855125", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The capacity to link records associated with the same individual across data sets is a key challenge for data-driven research. The challenge is exacerbated by the potential inclusion of both genomic and clinical data in data sets that may span multiple legal jurisdictions, and by the need to enable re-identification in limited circumstances. Privacy-Preserving Record Linkage PPRL methods address these challenges. In 2016, the Interdisciplinary Committee of the International Rare Diseases Research Consortium IRDiRC launched a task team to explore approaches to PPRL. The task team is a collaboration with the Global Alliance for Genomics and Health GA4GH Regulatory and Ethics and Data Security Work Streams, and aims to prepare policy and technology standards to enable highly reliable linking of records associated with the same individual without disclosing their identity except under conditions in which the use of the data has led to information of importance to the individual's safety or health, and applicable law allows or requires the return of results. The PPRL Task Force has examined the ethico-legal requirements, constraints, and implications of PPRL, and has applied this knowledge to the exploration of technology methods and approaches to PPRL. This paper reports and justifies the findings and recommendations thus far.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Weidman:2019:SIP, author = "Jake Weidman and William Aurite and Jens Grossklags", title = "On Sharing Intentions, and Personal and Interdependent Privacy Considerations for Genetic Data: a Vignette Study", journal = j-TCBB, volume = "16", number = "4", pages = "1349--1361", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2854785", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genetics and genetic data have been the subject of recent scholarly work, with significant attention paid towards understanding consent practices for the acquisition and usage of genetic data as well as genetic data security. Attitudes and perceptions concerning the trustworthiness of governmental institutions receiving test-taker data have been explored, with varied findings, but no robust models or deterministic relationships have been established that account for these differences. These results also do not explore in detail the perceptions regarding other types of organizations e.g., private corporations. Further, considerations of privacy interdependence arising from blood relative relationships have been absent from the conversation regarding the sharing of genetic data. This paper reports the results from a factorial vignette survey study in which we investigate how variables of ethnicity, age, genetic markers, and association of data with the individual's name affect the likelihood of sharing data with different types of organizations. We also investigate elements of personal and interdependent privacy concerns. We document the significant role these factors have in the decision to share or not share genetic data. We support our findings with a series of regression analyses.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mandoiu:2019:GEI, author = "Ion I. Mandoiu and Pavel Skums and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "16", number = "4", pages = "1362--1363", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2894215", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The papers in this special section were presented at the 12th International Symposium on Bioinformatics Research and Application ISBRA, which was held at Belarusian State University in Minsk, Belarus on June 5-8, 2016.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luhmann:2019:SSP, author = "Nina Luhmann and Manuel Lafond and Annelyse Thevenin and Aida Ouangraoua and Roland Wittler and Cedric Chauve", title = "The {SCJ} Small Parsimony Problem for Weighted Gene Adjacencies", journal = j-TCBB, volume = "16", number = "4", pages = "1364--1373", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2661761", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Reconstructing ancestral gene orders in a given phylogeny is a classical problem in comparative genomics. Most existing methods compare conserved features in extant genomes in the phylogeny to define potential ancestral gene adjacencies, and either try to reconstruct all ancestral genomes under a global evolutionary parsimony criterion, or, focusing on a single ancestral genome, use a scaffolding approach to select a subset of ancestral gene adjacencies, generally aiming at reducing the fragmentation of the reconstructed ancestral genome. In this paper, we describe an exact algorithm for the Small Parsimony Problem that combines both approaches. We consider that gene adjacencies at internal nodes of the species phylogeny are weighted, and we introduce an objective function defined as a convex combination of these weights and the evolutionary cost under the Single-Cut-or-Join SCJ model. The weights of ancestral gene adjacencies can, e.g., be obtained through the recent availability of ancient DNA sequencing data, which provide a direct hint at the genome structure of the considered ancestor, or through probabilistic analysis of gene adjacencies evolution. We show the NP-hardness of our problem variant and propose a Fixed-Parameter Tractable algorithm based on the Sankoff-Rousseau dynamic programming algorithm that also allows to sample co-optimal solutions. We apply our approach to mammalian and bacterial data providing different degrees of complexity. We show that including adjacency weights in the objective has a significant impact in reducing the fragmentation of the reconstructed ancestral gene orders. An implementation is available at http://github.com/nluhmann/PhySca.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Markin:2019:ELS, author = "Alexey Markin and Oliver Eulenstein", title = "Efficient Local Search for {Euclidean} Path-Difference Median Trees", journal = j-TCBB, volume = "16", number = "4", pages = "1374--1385", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2763137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Synthesizing large-scale phylogenetic trees is a fundamental problem in evolutionary biology. Median tree problems have evolved as a powerful tool to reconstruct such trees. Such problems seek a median tree for a given collection of input trees under some problem-specific tree distance. There has been an increased interest in the median tree problem for the classical path-difference distance between trees. While this problem is NP-hard, standard local search heuristics have been described that are based on solving a local search problem exactly. For a more effective heuristic we devise a time efficient algorithm for the local search problem that improves on the best-known solution by a factor of $n$, where $n$ is the size of the input trees. Furthermore, we introduce a novel hybrid version of the standard local search that is exploiting our new algorithm for a more refined heuristic search. Finally, we demonstrate the performance of our hybrid heuristic in a comparative study with other commonly used methods that synthesize species trees using published empirical data sets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:CRP, author = "Min Li and Peng Ni and Xiaopei Chen and Jianxin Wang and Fang-Xiang Wu and Yi Pan", title = "Construction of Refined Protein Interaction Network for Predicting Essential Proteins", journal = j-TCBB, volume = "16", number = "4", pages = "1386--1397", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2665482", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Identification of essential proteins based on protein interaction network PIN is a very important and hot topic in the post genome era. Up to now, a number of network-based essential protein discovery methods have been proposed. Generally, a static protein interaction network was constructed by using the protein-protein interactions obtained from different experiments or databases. Unfortunately, most of the network-based essential protein discovery methods are sensitive to the reliability of the constructed PIN. In this paper, we propose a new method for constructing refined PIN by using gene expression profiles and subcellular location information. The basic idea behind refining the PIN is that two proteins should have higher possibility to physically interact with each other if they appear together at the same subcellular location and are active together at least at a time point in the cell cycle. The original static PIN is denoted by S-PIN while the final PIN refined by our method is denoted by TS-PIN. To evaluate whether the constructed TS-PIN is more suitable to be used in the identification of essential proteins, 10 network-based essential protein discovery methods DC, EC, SC, BC, CC, IC, LAC, NC, BN, and DMNC are applied on it to identify essential proteins. A comparison of TS-PIN and two other networks: S-PIN and NF-APIN a noise-filtered active PIN constructed by using gene expression data and S-PIN is implemented on the prediction of essential proteins by using these ten network-based methods. The comparison results show that all of the 10 network-based methods achieve better results when being applied on TS-PIN than that being applied on S-PIN and NF-APIN.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sergeev:2019:GWA, author = "Roman Sergeevich Sergeev and Ivan S. Kavaliou and Uladzislau V. Sataneuski and Andrei Gabrielian and Alex Rosenthal and Michael Tartakovsky and Alexander V. Tuzikov", title = "Genome-Wide Analysis of {MDR} and {XDR} Tuberculosis from {Belarus}: Machine-Learning Approach", journal = j-TCBB, volume = "16", number = "4", pages = "1398--1408", month = jul, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2720669", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Emergence of drug-resistant microorganisms has been recognized as a serious threat to public health worldwide. This problem is extensively discussed in the context of tuberculosis treatment. Alterations in pathogen genomes are among the main mechanisms by which microorganisms exhibit drug resistance. Analysis of 144 M. tuberculosis strains of different phenotypes including drug susceptible, MDR, and XDR isolated in Belarus was fulfilled in this paper. A wide range of machine learning methods that can discover SNPs related to drug-resistance in the whole bacteria genomes was investigated. Besides single-SNP testing approaches, methods that allow detecting joint effects from interacting SNPs were considered. We proposed a framework for automated selection of the best performing statistical model in terms of recall, precision, and accuracy to identify drug resistance-associated mutations. Analysis of whole-genome sequences often leads to situations where the number of treated features exceeds the number of available observations. For this reason, special attention is paid to fair evaluation of the model prediction quality and minimizing the risk of overfitting while estimating the underlying parameters. Results of our experiments aimed at identifying top-scoring resistance mutations to the major first-line and second-line anti-TB drugs are presented.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Shehu:2019:GEA, author = "Amarda Shehu and Giuseppe Pozzi and Tamer Kahveci", title = "Guest Editorial for the {ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics}", journal = j-TCBB, volume = "16", number = "5", pages = "1409--1409", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2921083", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The six papers in this special section were presented at the ACM Conference on Bioinformatics, Computational Biology, and Health Informatics ACM BCB in 2017.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Bonizzoni:2019:DRI, author = "Paola Bonizzoni and Simone Ciccolella and Gianluca Della Vedova and Mauricio Soto", title = "Does Relaxing the Infinite Sites Assumption Give Better Tumor Phylogenies? {An} {ILP}-Based Comparative Approach", journal = j-TCBB, volume = "16", number = "5", pages = "1410--1423", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2865729", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Most of the evolutionary history reconstruction approaches are based on the infinite sites assumption, which states that mutations appear once in the evolutionary history. The Perfect Phylogeny model is the result of the infinite sites assumption and has been widely used to infer cancer evolution. Nonetheless, recent results show that recurrent and back mutations are present in the evolutionary history of tumors, hence the Perfect Phylogeny model might be too restrictive. We propose an approach that allows losing previously acquired mutations and multiple acquisitions of a character. Moreover, we provide an ILP formulation for the evolutionary tree reconstruction problem. Our formulation allows us to tackle both the Incomplete Directed Phylogeny problem and the Clonal Reconstruction problem when general evolutionary models are considered. The latter problem is fundamental in cancer genomics, the goal is to study the evolutionary history of a tumor considering as input data the fraction of cells having a certain mutation in a set of cancer samples. For the Clonal Reconstruction problem, an experimental analysis shows the advantage of allowing mutation losses. Namely, by analyzing real and simulated datasets, our ILP approach provides a better interpretation of the evolutionary history than a Perfect Phylogeny. The software is at https://github.com/AlgoLab/gppf.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cleary:2019:EFR, author = "Alan Cleary and Thiruvarangan Ramaraj and Indika Kahanda and Joann Mudge and Brendan Mumey", title = "Exploring Frequented Regions in Pan-Genomic Graphs", journal = j-TCBB, volume = "16", number = "5", pages = "1424--1435", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864564", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider the problem of identifying regions within a pan-genome De Bruijn graph that are traversed by many sequence paths. We define such regions and the subpaths that traverse them as frequented regions FRs. In this work, we formalize the FR problem and describe an efficient algorithm for finding FRs. Subsequently, we propose some applications of FRs based on machine-learning and pan-genome graph simplification. We demonstrate the effectiveness of these applications using data sets for the organisms Staphylococcus aureus bacterium and Saccharomyces cerevisiae yeast. We corroborate the biological relevance of FRs such as identifying introgressions in yeast that aid in alcohol tolerance, and show that FRs are useful for classification of yeast strains by industrial use and visualizing pan-genomic space.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kwon:2019:EER, author = "Sunyoung Kwon and Sungroh Yoon", title = "End-to-End Representation Learning for Chemical-Chemical Interaction Prediction", journal = j-TCBB, volume = "16", number = "5", pages = "1436--1447", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864149", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Chemical-chemical interaction CCI plays a major role in predicting candidate drugs, toxicities, therapeutic effects, and biological functions. CCI is typically inferred from a variety of information; however, CCI has yet not been predicted using a learning-based approach. In other drug analyses, deep learning has been actively used in recent years. However, in most cases, deep learning has been used only for classification even though it has feature extraction capabilities. Thus, in this paper, we propose an end-to-end representation learning method for CCI, named DeepCCI, which includes feature extraction and a learning-based approach. Our proposed architecture is based on the Siamese network. Hidden representations are extracted from a simplified molecular input line entry system SMILES, which is a string notation representing the chemical structure using weight-shared convolutional neural networks. Subsequently, L1 element-wise distances between the two extracted hidden representations are measured. The performance of DeepCCI is compared with those of 12 fingerprint-method combinations. The proposed DeepCCI shows the best performance in most of the evaluation metrics used. In addition, DeepCCI was experimentally validated to guarantee the commutative property. The automatically extracted features can alleviate the efforts required for manual feature engineering and improve prediction performance.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Malik:2019:RCS, author = "Laraib Malik and Rob Patro", title = "Rich Chromatin Structure Prediction from {Hi-C} Data", journal = j-TCBB, volume = "16", number = "5", pages = "1448--1458", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2851200", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent studies involving the 3-dimensional conformation of chromatin have revealed the important role it has to play in different processes within the cell. These studies have also led to the discovery of densely interacting segments of the chromosome, called topologically associating domains. The accurate identification of these domains from Hi-C interaction data is an interesting and important computational problem for which numerous methods have been proposed. Unfortunately, most existing algorithms designed to identify these domains assume that they are non-overlapping whereas there is substantial evidence to believe a nested structure exists. We present a methodology to predict hierarchical chromatin domains using chromatin conformation capture data. Our method predicts domains at different resolutions, calculated using intrinsic properties of the chromatin data, and effectively clusters these to construct the hierarchy. At each individual level, the domains are non-overlapping in such a way that the intra-domain interaction frequencies are maximized. We show that our predicted structure is highly enriched for actively transcribing housekeeping genes and various chromatin markers, including CTCF, around the domain boundaries. We also show that large-scale domains, at multiple resolutions within our hierarchy, are conserved across cell types and species. We also provide comparisons against existing tools for extracting hierarchical domains. Our software, Matryoshka, is written in C++11 and licensed under GPL v3; it is available at https://github.com/COMBINE-lab/matryoshka.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Markin:2019:CMT, author = "Alexey Markin and Oliver Eulenstein", title = "Cophenetic Median Trees", journal = j-TCBB, volume = "16", number = "5", pages = "1459--1470", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2870173", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Median tree inference under path-difference metrics has shown great promise for large-scale phylogeny estimation. Similar to these metrics is the family of cophenetic metrics that originates from a classic dendrogram comparison method introduced more than 50 years ago. Despite the appeal of this family of metrics, the problem of computing median trees under cophenetic metrics has not been analyzed. Like other standard median tree problems relevant in practice, as we show here, this problem is also NP-hard. NP-hard median tree problems have been successfully addressed by local search heuristics that are solving thousands of instances of a corresponding local neighborhood search problem. For the local neighborhood search problem under a cophenetic metric, the best known na{\"\i}ve algorithm has a time complexity that is typically prohibitive for effective heuristic searches. Building on the pioneering work on path-difference median trees, we develop efficient algorithms for Manhattan and Euclidean cophenetic search problems that improve on the na{\"\i}ve solution by a linear and a quadratic factor, respectively. We demonstrate the performance and effectiveness of the resulting heuristic methods in a comparative study using benchmark empirical datasets.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2019:TSS, author = "Lu Zhu and Ralf Hofestadt and Martin Ester", title = "Tissue-Specific Subcellular Localization Prediction Using Multi-Label {Markov} Random Fields", journal = j-TCBB, volume = "16", number = "5", pages = "1471--1482", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2897683", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The understanding of subcellular localization SCL of proteins and proteome variation in the different tissues and organs of the human body are two crucial aspects for increasing our knowledge of the dynamic rules of proteins, the cell biology, and the mechanism of diseases. Although there have been tremendous contributions to these two fields independently, the lack of knowledge of the variation of spatial distribution of proteins in the different tissues still exists. Here, we proposed an approach that allows predicting protein SCL on tissue specificity through the use of tissue-specific functional associations and physical protein-protein interactions PPIs. We applied our previously developed Bayesian collective Markov random fields BCMRFs on tissue-specific protein-protein interaction network PPI network for nine types of tissues focusing on eight high-level SCL. The evaluated results demonstrate the strength of our approach in predicting tissue-specific SCL. We identified 1,314 proteins that their SCL were previously proven cell line dependent. We predicted 549 novel tissue-specific localized candidate proteins while some of them were validated via text-mining.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Le:2019:FPA, author = "Thuc Duy Le and Tao Hoang and Jiuyong Li and Lin Liu and Huawen Liu and Shu Hu", title = "A Fast {PC} Algorithm for High Dimensional Causal Discovery with Multi-Core {PCs}", journal = j-TCBB, volume = "16", number = "5", pages = "1483--1495", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2591526", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Discovering causal relationships from observational data is a crucial problem and it has applications in many research areas. The PC algorithm is the state-of-the-art constraint based method for causal discovery. However, runtime of the PC algorithm, in the worst-case, is exponential to the number of nodes variables, and thus it is inefficient when being applied to high dimensional data, e.g., gene expression datasets. On another note, the advancement of computer hardware in the last decade has resulted in the widespread availability of multi-core personal computers. There is a significant motivation for designing a parallelized PC algorithm that is suitable for personal computers and does not require end users' parallel computing knowledge beyond their competency in using the PC algorithm. In this paper, we develop parallel-PC, a fast and memory efficient PC algorithm using the parallel computing technique. We apply our method to a range of synthetic and real-world high dimensional datasets. Experimental results on a dataset from the DREAM 5 challenge show that the original PC algorithm could not produce any results after running more than 24 hours; meanwhile, our parallel-PC algorithm managed to finish within around 12 hours with a 4-core CPU computer, and less than six hours with a 8-core CPU computer. Furthermore, we integrate parallel-PC into a causal inference method for inferring miRNA-mRNA regulatory relationships. The experimental results show that parallel-PC helps improve both the efficiency and accuracy of the causal inference algorithm.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Meysman:2019:MES, author = "Pieter Meysman and Yvan Saeys and Ehsan Sabaghian and Wout Bittremieux and Yves {Van de Peer} and Bart Goethals and Kris Laukens", title = "Mining the Enriched Subgraphs for Specific Vertices in a Biological Graph", journal = j-TCBB, volume = "16", number = "5", pages = "1496--1507", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2576440", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this paper, we present a subgroup discovery method to find subgraphs in a graph that are associated with a given set of vertices. The association between a subgraph pattern and a set of vertices is defined by its significant enrichment based on a Bonferroni-corrected hypergeometric probability value. This interestingness measure requires a dedicated pruning procedure to limit the number of subgraph matches that must be calculated. The presented mining algorithm to find associated subgraph patterns in large graphs is therefore designed to efficiently traverse the search space. We demonstrate the operation of this method by applying it on three biological graph data sets and show that we can find associated subgraphs for a biologically relevant set of vertices and that the found subgraphs themselves are biologically interesting.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jahanshad:2019:MSM, author = "Neda Jahanshad and Joshua Faskowitz and Gennady Roshchupkin and Derrek P. Hibar and Boris A. Gutman and Nicholas J. Tustison and Hieab H. H. Adams and Wiro J. Niessen and Meike W. Vernooij and M. Arfan Ikram and Marcel P. Zwiers and Alejandro Arias Vasquez and Barbara Franke and Jennifer L. Kroll and Benson Mwangi and Jair C. Soares and Alex Ing and Sylvane Desrivieres and Gunter Schumann and Narelle K. Hansell and Greig I. de Zubicaray and Katie L. McMahon and Nicholas G. Martin and Margaret J. Wright and Paul M. Thompson", title = "Multi-Site Meta-Analysis of Morphometry", journal = j-TCBB, volume = "16", number = "5", pages = "1508--1514", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914905", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Genome-wide association studies GWAS link full genome data to a handful of traits. However, in neuroimaging studies, there is an almost unlimited number of traits that can be extracted for full image-wide big data analyses. Large populations are needed to achieve the necessary power to detect statistically significant effects, emphasizing the need to pool data across multiple studies. Neuroimaging consortia, e.g., ENIGMA and CHARGE, are now analyzing MRI data from over 30,000 individuals. Distributed processing protocols extract harmonized features at each site, and pool together only the cohort statistics using meta analysis to avoid data sharing. To date, such MRI projects have focused on single measures such as hippocampal volume, yet voxelwise analyses e.g., tensor-based morphometry; TBM may help better localize statistical effects. This can lead to $ 10^{13} $ tests for GWAS and become underpowered. We developed an analytical framework for multi-site TBM by performing multi-channel registration to cohort-specific templates. Our results highlight the reliability of the method and the added power over alternative options while preserving single site specificity and opening the doors for well-powered image-wide genome-wide discoveries.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mirzaei:2019:PSP, author = "Shokoufeh Mirzaei and Tomer Sidi and Chen Keasar and Silvia Crivelli", title = "Purely Structural Protein Scoring Functions Using Support Vector Machine and Ensemble Learning", journal = j-TCBB, volume = "16", number = "5", pages = "1515--1523", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2602269", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The function of a protein is determined by its structure, which creates a need for efficient methods of protein structure determination to advance scientific and medical research. Because current experimental structure determination methods carry a high price tag, computational predictions are highly desirable. Given a protein sequence, computational methods produce numerous 3D structures known as decoys. Selection of the best quality decoys is both challenging and essential as the end users can handle only a few ones. Therefore, scoring functions are central to decoy selection. They combine measurable features into a single number indicator of decoy quality. Unfortunately, current scoring functions do not consistently select the best decoys. Machine learning techniques offer great potential to improve decoy scoring. This paper presents two machine-learning based scoring functions to predict the quality of proteins structures, i.e., the similarity between the predicted structure and the experimental one without knowing the latter. We use different metrics to compare these scoring functions against three state-of-the-art scores. This is a first attempt at comparing different scoring functions using the same non-redundant dataset for training and testing and the same features. The results show that adding informative features may be more significant than the method used.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Singh:2019:TSK, author = "Ritambhara Singh and Jack Lanchantin and Gabriel Robins and Yanjun Qi", title = "Transfer String Kernel for Cross-Context {DNA}--Protein Binding Prediction", journal = j-TCBB, volume = "16", number = "5", pages = "1524--1536", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2016.2609918", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Through sequence-based classification, this paper tries to accurately predict the DNA binding sites of transcription factors TFs in an unannotated cellular context. Related methods in the literature fail to perform such predictions accurately, since they do not consider sample distribution shift of sequence segments from an annotated source context to an unannotated target context. We, therefore, propose a method called ``Transfer String Kernel'' TSK that achieves improved prediction of transcription factor binding site TFBS using knowledge transfer via cross-context sample adaptation. TSK maps sequence segments to a high-dimensional feature space using a discriminative mismatch string kernel framework. In this high-dimensional space, labeled examples of the source context are re-weighted so that the revised sample distribution matches the target context more closely. We have experimentally verified TSK for TFBS identifications on 14 different TFs under a cross-organism setting. We find that TSK consistently outperforms the state-of-the-art TFBS tools, especially when working with TFs whose binding sequences are not conserved across contexts. We also demonstrate the generalizability of TSK by showing its cutting-edge performance on a different set of cross-context tasks for the MHC peptide binding predictions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Saha:2019:DFM, author = "Tanay Kumar Saha and Ataur Katebi and Wajdi Dhifli and Mohammad {Al Hasan}", title = "Discovery of Functional Motifs from the Interface Region of Oligomeric Proteins Using Frequent Subgraph Mining", journal = j-TCBB, volume = "16", number = "5", pages = "1537--1549", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2756879", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Modeling the interface region of a protein complex paves the way for understanding its dynamics and functionalities. Existing works model the interface region of a complex by using different approaches, such as, the residue composition at the interface region, the geometry of the interface residues, or the structural alignment of interface regions. These approaches are useful for ranking a set of docked conformation or for building scoring function for protein-protein docking, but they do not provide a generic and scalable technique for the extraction of interface patterns leading to functional motif discovery. In this work, we model the interface region of a protein complex by graphs and extract interface patterns of the given complex in the form of frequent subgraphs. To achieve this, we develop a scalable algorithm for frequent subgraph mining. We show that a systematic review of the mined subgraphs provides an effective method for the discovery of functional motifs that exist along the interface region of a given protein complex. In our experiments, we use three PDB protein structure datasets. The first two datasets are composed of PDB structures from different conformations of two dimeric protein complexes: HIV-1 protease 329 structures, and triosephosphate isomerase TIM 86 structures. The third dataset is a collection of different enzyme structures protein structures from the six top-level enzyme classes, namely: Oxydoreductase, Transferase, Hydrolase, Lyase, Isomerase, and Ligase. We show that for the first two datasets, our method captures the locking mechanism at the dimeric interface by taking into account the spatial positioning of the interfacial residues through graphs. Indeed, our frequent subgraph mining based approach discovers the patterns representing the dimerization lock which is formed at the base of the structure in 323 of the 329 HIV-1 protease structures. Similarly, for 86 TIM structures, our approach discovers the dimerization lock formation in 50 structures. For the enzyme structures, we show that we are able to capture the functional motifs active sites that are specific to each of the six top-level classes of enzymes through frequent subgraphs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Frasca:2019:MPF, author = "Marco Frasca and Nicolo Cesa Bianchi", title = "Multitask Protein Function Prediction through Task Dissimilarity", journal = j-TCBB, volume = "16", number = "5", pages = "1550--1560", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2684127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Automated protein function prediction is a challenging problem with distinctive features, such as the hierarchical organization of protein functions and the scarcity of annotated proteins for most biological functions. We propose a multitask learning algorithm addressing both issues. Unlike standard multitask algorithms, which use task protein functions similarity information as a bias to speed up learning, we show that dissimilarity information enforces separation of rare class labels from frequent class labels, and for this reason is better suited for solving unbalanced protein function prediction problems. We support our claim by showing that a multitask extension of the label propagation algorithm empirically works best when the task relatedness information is represented using a dissimilarity matrix as opposed to a similarity matrix. Moreover, the experimental comparison carried out on three model organism shows that our method has a more stable performance in both ``protein-centric'' and ``function-centric'' evaluation settings.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Feret:2019:E, author = "Jerome Feret and Heinz Koeppl", title = "Editorial", journal = j-TCBB, volume = "16", number = "5", pages = "1561--1561", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2934374", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Presents the introductory editorial for this issue of the publication.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Harmer:2019:BCC, author = "Russ Harmer and Yves-Stan {Le Cornec} and Sebastien Legare and Eugenia Oshurko", title = "Bio-Curation for Cellular Signalling: The {KAMI Project}", journal = j-TCBB, volume = "16", number = "5", pages = "1562--1573", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2906164", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The general question of what constitutes bio-curation for rule-based modelling of cellular signalling is posed. A general approach to the problem is presented, based on rewriting in hierarchies of graphs, together with a specific instantiation of the methodology that addresses our particular bio-curation problem. The current state of the ongoing development of the KAMI bio-curation tool, based on this approach, is outlined along with our plans for future development.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Biane:2019:CRB, author = "Celia Biane and Franck Delaplace", title = "Causal Reasoning on {Boolean} Control Networks Based on Abduction: Theory and Application to Cancer Drug Discovery", journal = j-TCBB, volume = "16", number = "5", pages = "1574--1585", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2889102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Complex diseases such as Cancer or Alzheimer's are caused by multiple molecular perturbations leading to pathological cellular behavior. However, the identification of disease-induced molecular perturbations and subsequent development of efficient therapies are challenged by the complexity of the genotype-phenotype relationship. Accordingly, a key issue is to develop frameworks relating molecular perturbations and drug effects to their consequences on cellular phenotypes. Such framework would aim at identifying the sets of causal molecular factors leading to phenotypic reprogramming. In this article, we propose a theoretical framework, called Boolean Control Networks, where disease-induced molecular perturbations and drug actions are seen as topological perturbations/actions on molecular networks leading to cell phenotype reprogramming. We present a new method using abductive reasoning principles inferring the minimal causal topological actions leading to an expected behavior at stable state. Then, we compare different implementations of the algorithm and finally, show a proof-of-concept of the approach on a model of network regulating the proliferation/apoptosis switch in breast cancer by automatically discovering driver genes and their synthetic lethal drug target partner.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Abbas:2019:QRE, author = "Houssam Abbas and Alena Rodionova and Konstantinos Mamouras and Ezio Bartocci and Scott A. Smolka and Radu Grosu", title = "Quantitative Regular Expressions for Arrhythmia Detection", journal = j-TCBB, volume = "16", number = "5", pages = "1586--1597", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2885274", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Implantable medical devices are safety-critical systems whose incorrect operation can jeopardize a patient's health, and whose algorithms must meet tight platform constraints like memory consumption and runtime. In particular, we consider here the case of implantable cardioverter defibrillators, where peak detection algorithms and various others discrimination algorithms serve to distinguish fatal from non-fatal arrhythmias in a cardiac signal. Motivated by the need for powerful formal methods to reason about the performance of arrhythmia detection algorithms, we show how to specify all these algorithms using Quantitative Regular Expressions QREs. QRE is a formal language to express complex numerical queries over data streams, with provable runtime and memory consumption guarantees. We show that QREs are more suitable than classical temporal logics to express in a concise and easy way a range of peak detectors in both the time and wavelet domains and various discriminators at the heart of today's arrhythmia detection devices. The proposed formalization also opens the way to formal analysis and rigorous testing of these detectors' correctness and performance, alleviating the regulatory burden on device developers when modifying their algorithms. We demonstrate the effectiveness of our approach by executing QRE-based monitors on real patient data on which they yield results on par with the results reported in the medical literature.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Luck:2019:HMM, author = "Alexander Luck and Pascal Giehr and Karl Nordstrom and Jorn Walter and Verena Wolf", title = "Hidden {Markov} Modelling Reveals Neighborhood Dependence of {Dnmt3a} and 3b Activity", journal = j-TCBB, volume = "16", number = "5", pages = "1598--1609", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2910814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "DNA methylation is an epigenetic mark whose important role in development has been widely recognized. This epigenetic modification results in heritable information not encoded by the DNA sequence. The underlying mechanisms controlling DNA methylation are only partly understood. Several mechanistic models of enzyme activities responsible for DNA methylation have been proposed. Here, we extend existing Hidden Markov Models HMMs for DNA methylation by describing the occurrence of spatial methylation patterns over time and propose several models with different neighborhood dependences. Furthermore, we investigate correlations between the neighborhood dependence and other genomic information. We perform numerical analysis of the HMMs applied to comprehensive hairpin and non-hairpin bisulfite sequencing measurements and accurately predict wild-type data. We find evidence that the activities of Dnmt3a and Dnmt3b responsible for de novo methylation depend on 5' left but not on 3' right neighboring CpGs in a sequencing string.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Mandon:2019:ASR, author = "Hugues Mandon and Cui Su and Jun Pang and Soumya Paul and Stefan Haar and Loic Pauleve", title = "Algorithms for the Sequential Reprogramming of {Boolean} Networks", journal = j-TCBB, volume = "16", number = "5", pages = "1610--1619", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914383", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cellular reprogramming, a technique that opens huge opportunities in modern and regenerative medicine, heavily relies on identifying key genes to perturb. Most of the existing computational methods for controlling which attractor steady state the cell will reach focus on finding mutations to apply to the initial state. However, it has been shown, and is proved in this article, that waiting between perturbations so that the update dynamics of the system prepares the ground, allows for new reprogramming strategies. To identify such sequential perturbations, we consider a qualitative model of regulatory networks, and rely on Binary Decision Diagrams to model their dynamics and the putative perturbations. Our method establishes a set identification of sequential perturbations, whether permanent mutations or only temporary, to achieve the existential or inevitable reachability of an arbitrary state of the system. We apply an implementation for temporary perturbations on models from the literature, illustrating that we are able to derive sequential perturbations to achieve trans-differentiation.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kapilevich:2019:CRC, author = "Viacheslav Kapilevich and Shigeto Seno and Hideo Matsuda and Yoichi Takenaka", title = "Chromatin {$3$D} Reconstruction from Chromosomal Contacts Using a Genetic Algorithm", journal = j-TCBB, volume = "16", number = "5", pages = "1620--1626", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814995", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Recent epigenetics research has demonstrated that chromatin conformation plays an important role in various aspects of gene regulation. Chromosome Conformation Capture 3C technology makes it possible to analyze the spatial organization of chromatin in a cell. Several algorithms for three-dimensional reconstruction of chromatin structure from 3C experimental data have been proposed. Compared to other algorithms, ShRec3D, one of the most advanced algorithms, can reconstruct a chromatin model in the shortest time for high-resolution whole-genome experimental data. However, ShRec3D employs a graph shortest path algorithm, which introduces errors in the resulting model. We propose an improved algorithm that optimizes shortest path distances using a genetic algorithm approach. The proposed algorithm and ShRec3D were compared using in silico 3C experimental data. Compared to ShRec3D, the proposed algorithm demonstrated significant improvement relative to the similarity between the algorithm's output and the original model with a reasonable increase to calculation time.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Jhee:2019:CSC, author = "Jong Ho Jhee and Sunjoo Bang and Dong-gi Lee and Hyunjung Shin", title = "Comorbidity Scoring with Causal Disease Networks", journal = j-TCBB, volume = "16", number = "5", pages = "1627--1634", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2812886", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In recent years, there has been numerous studies constructing a disease network with diverse sources of data. Many researchers attempted to extend the usage of the disease network by employing machine learning algorithms on various problems such as prediction of comorbidity. The relations between diseases can further be specified into causal relations. When causality is laid on the edges in the network, prediction for comorbid diseases can be more improved. However, not many machine learning algorithms have been developed to concern causality. In this study, we exploit a network based machine learning algorithm that generates comorbidity scores from a causal disease network. In order to find comorbid diseases, semi-supervised scoring for causal networks is proposed. It computes scores of entire nodes in the network when a specific node is labeled. Each score is calculated one at a time and affects to the others along causal edges. The algorithm iterates until it converges. We compared the scoring results of the causal disease network and those of simple association network. As a gold standard, we referenced the values of relative risk from prevalence database, HuDiNe. Scoring by the proposed method provides clearer distinguishability between the top-ranked diseases in the comorbidity list. This is a benefit because it allows the choosing of the most significant ones on an easier fashion. To present typical use of the resulting list, comorbid diseases of Huntington disease and pnuemonia are validated via PubMed literature, respectively.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:CIC, author = "Donghe Li and Wonji Kim and Longfei Wang and Kyong-Ah Yoon and Boyoung Park and Charny Park and Sun-Young Kong and Yongdeuk Hwang and Daehyun Baek and Eun Sook Lee and Sungho Won", title = "Comparison of {INDEL} Calling Tools with Simulation Data and Real Short-Read Data", journal = j-TCBB, volume = "16", number = "5", pages = "1635--1644", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2854793", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Insertions and deletions INDELs comprise a significant proportion of human genetic variation, and recent papers have revealed that many human diseases may be attributable to INDELs. With the development of next-generation sequencing NGS technology, many statistical/computational tools have been developed for calling INDELs. However, there are differences among those tools, and comparisons among them have been limited. In order to better understand these inter-tool differences, five popular and publicly available INDEL calling tools-GATK HaplotypeCaller, Platypus, VarScan2, Scalpel, and GotCloud-were evaluated using simulation data, 1000 Genomes Project data, and family-based sequencing data. The accuracy of INDEL calling by each tool was mainly evaluated by concordance rates. Family-based sequencing data, which consisted of 49 individuals from eight Korean families, were used to calculate Mendelian error rates. Our comparison results show that GATK HaplotypeCaller usually performs the best and that joint calling with Platypus can lead to additional improvements in accuracy. The result of this study provides important information regarding future directions for the variant detection and the algorithms development.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Nishida:2019:EEP, author = "Shimpei Nishida and Shun Sakuraba and Kiyoshi Asai and Michiaki Hamada", title = "Estimating Energy Parameters for {RNA} Secondary Structure Predictions Using Both Experimental and Computational Data", journal = j-TCBB, volume = "16", number = "5", pages = "1645--1655", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2813388", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Computational RNA secondary structure prediction depends on a large number of nearest-neighbor free-energy parameters, including 10 parameters for Watson--Crick stacked base pairs that were estimated from experimental measurements of the free energies of 90 RNA duplexes. These experimental data are provided by time-consuming and cost-intensive experiments. In contrast, various modified nucleotides in RNAs, which would affect not only their structures but also functions, have been found, and rapid determination of energy parameters for a such modified nucleotides is needed. To reduce the high cost of determining energy parameters, we propose a novel method to estimate energy parameters from both experimental and computational data, where the computational data are provided by a recently developed molecular dynamics simulation protocol. We evaluate our method for Watson--Crick stacked base pairs, and show that parameters estimated from 10 experimental data items and 10 computational data items can predict RNA secondary structures with accuracy comparable to that using conventional parameters. The results indicate that the combination of experimental free-energy measurements and molecular dynamics simulations is capable of estimating the thermodynamic properties of RNA secondary structures at lower cost.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rhee:2019:ILC, author = "Je-Keun Rhee and Jinseon Yoo and Kyu Ryung Kim and Jeeyoon Kim and Yong-Jae Lee and Byoung Chul Cho and Tae-Min Kim", title = "Identification of Local Clusters of Mutation Hotspots in Cancer-Related Genes and Their Biological Relevance", journal = j-TCBB, volume = "16", number = "5", pages = "1656--1662", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2813375", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Mutation hotspots are either solitary amino acid residues or stretches of amino acids that show elevated mutation frequency in cancer-related genes, but their prevalence and biological relevance are not completely understood. Here, we developed a Smith-Waterman algorithm-based mutation hotspot discovery method, MutClustSW, to identify mutation hotspots of either single or clustered amino acid residues. We identified 181 missense mutation hotspots from COSMIC and TCGA mutation databases. In addition to 77 single amino acid residue hotspots 42.5 percent including well-known mutation hotspots such as IDH1 p.R132 and BRAF p.V600, we identified 104 mutation hotspots 57.5 percent as clusters or stretches of multiple amino acids, and the hotspots on MUC2, EPPK1, KMT2C, and TP53 were larger than 50 amino acids. Twelve of 27 nonsense mutation hotspots 44.4 percent were observed in four cancer-related genes, TP53, ARID1A, CDKN2A, and PTEN, suggesting that truncating mutations on some tumor suppressor genes are not randomly distributed as previously assumed. We also show that hotspot mutations have higher mutation allele frequency than non-hotspots, and the hotspot information can be used to prioritize the cancer drivers. Together, the proposed algorithm and the mutation hotspot information can serve as valuable resources in the selection of functional driver mutations and associated genes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Rampogu:2019:INS, author = "Shailima Rampogu and Ayoung Baek and Rohit Bavi and Minky Son and Guang Ping Cao and Raj Kumar and Chanin Park and Amir Zeb and Rabia Mukthar Rana and Seok Ju Park and Keun Woo Lee", title = "Identification of Novel Scaffolds with Dual Role as Antiepileptic and Anti-Breast Cancer", journal = j-TCBB, volume = "16", number = "5", pages = "1663--1674", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2855138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Aromatase inhibitors with an $ \mathrm {IC}_{50} $ value ranging from 1.4 to 49.7 $ \mu $M are known to act as antiepileptic drugs besides being potential breast cancer inhibitors. The aim of the present study is to identify novel antiepileptic aromatase inhibitors with higher activity exploiting the ligand-based pharmacophore approach utilizing the experimentally known inhibitors. The resultant Hypo1 consists of four features and was further validated by using three different strategies. Hypo1 was allowed to screen different databases to identify lead molecules and were further subjected to Lipinski's Rule of Five and ADMET to establish their drug-like properties. Consequently, the obtained 68-screened molecules were subjected to molecular docking by GOLD v5.2.2. Furthermore, the compounds with the highest dock scores were assessed for molecular interactions. Later, the MD simulation was applied to evaluate the protein backbone stabilities and binding energies adapting GROMACS v5.0.6 and MM/PBSA which was followed by the density functional theory DFT, to analyze their orbital energies, and further the energy gap between them. Eventually, the number of Hit molecules was culled to three projecting Hit1, Hit2, and Hit3 as the potential lead compounds based on their highest dock scores, hydrogen bond interaction, lowest energy gap, and the least binding energies and stable MD results.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Sudo:2019:SWM, author = "Hiroki Sudo and Masanobu Jimbo and Koji Nuida and Kana Shimizu", title = "Secure Wavelet Matrix: Alphabet-Friendly Privacy-Preserving String Search for Bioinformatics", journal = j-TCBB, volume = "16", number = "5", pages = "1675--1684", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814039", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Biomedical data often includes personal information, and the technology is demanded that enables the searching of such sensitive data while protecting privacy. We consider a case in which a server has a text database and a user searches the database to find substring matches. The user wants to conceal his/her query and the server wants to conceal the database except for the search results. The previous approach for this problem is based on a linear-time algorithm in terms of alphabet size $ \mathbf {| \Sigma |} $, and it cannot search on the database of large alphabet such as biomedical documents. We present a novel algorithm that can search a string in logarithmic time of $ \mathbf {| \Sigma |} $. In our algorithm, named secure wavelet matrix sWM, we use an additively homomorphic encryption to build an efficient data structure called a wavelet matrix. In an experiment using a simulated string of length 10,000 whose alphabet size ranges from 4 to 1024, the run time of the sWM was up to around two orders of magnitude faster than that of the previous method. sWM enables the searching of a private database efficiently and thus it will facilitate utilizing sensitive biomedical information.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Kim:2019:SDG, author = "Man-Sun Kim and Dongsan Kim and Jeong-Rae Kim", title = "Stage-Dependent Gene Expression Profiling in Colorectal Cancer", journal = j-TCBB, volume = "16", number = "5", pages = "1685--1692", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814043", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Temporal gene expression profiles have been widely considered to uncover the mechanism of cancer development and progression. Gene expression patterns, however, have been analyzed for limited stages with small samples, without proper data pre-processing, in many cases. With those approaches, it is difficult to unveil the mechanism of cancer development over time. In this study, we analyzed gene expression profiles of two independent colorectal cancer sample datasets, each of which contained 556 and 566 samples, respectively. To find specific gene expression changes according to cancer stage, we applied the linear mixed-effect regression model LMER that controls other clinical variables. Based on this methodology, we found two types of gene expression patterns: continuously increasing and decreasing genes as cancer develops. We found that continuously increasing genes are related to the nervous and developmental system, whereas the others are related to the cell cycle and metabolic processes. We further analyzed connected sub-networks related to the two types of genes. From these results, we suggest that the gene expression profile analysis can be used to understand underlying the mechanisms of cancer development such as cancer growth and metastasis. Furthermore, our approach can provide a good guideline for advancing our understanding of cancer developmental processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Moon:2019:SIC, author = "SeongRyeol Moon and Curt Balch and Sungjin Park and Jinhyuk Lee and Jiyong Sung and Seungyoon Nam", title = "Systematic Inspection of the Clinical Relevance of {TP53} Missense Mutations in Gastric Cancer", journal = j-TCBB, volume = "16", number = "5", pages = "1693--1701", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814049", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The ``guardian of the genome,'' TP53, is one of the most frequently mutated genes of all cancers. Despite the important biological roles of TP53, the clinical relevance of TP53 mutations, in gastric cancer GC, remains largely unknown. Here, we systematically assessed clinical relevance, in terms of TP53 mutation positions, finding substantial variability. Thus, we hypothesized that the position of the TP53 mutation might affect clinical outcomes in GC. We systematically inspected missense mutations in TP53, from a TCGA The Cancer Genome Atlas GC dataset in UCSC Xena repository. Specifically, we examined five aspects of each mutational position: 1 the whole gene body; 2 known hot-spots; 3 the DNA-binding domain; 4 the secondary structure of the domain; and 5 individual mutation positions. We then analyzed the clinical outcomes for each aspect. These results showed that, in terms of secondary structure, patients with mutations in turn regions showed poor prognosis, compared to those with mutations in beta strand regions log rank $ {\text {p}} = 0.043 $. Also, in terms of individual mutation positions, patients having mutations at R248 showed poorer survival than other patients having mutations at different TP53 positions log rank $ p = 0.035 $.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Hao:2019:ASS, author = "FanChang Hao and Melvin Zhang and Hon Wai Leong", title = "A $2$-Approximation Scheme for Sorting Signed Permutations by Reversals, Transpositions, Transreversals, and Block-Interchanges", journal = j-TCBB, volume = "16", number = "5", pages = "1702--1711", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2719681", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "We consider the problem of sorting signed permutations by reversals, transpositions, transreversals, and block-interchanges and give a 2-approximation scheme, called the GSB Genome Sorting by Bridges scheme. Our result extends 2-approximation algorithm of He and Chen [12] that allowed only reversals and block-interchanges, and also the 1.5 approximation algorithm of Hartman and Sharan [11] that allowed only transreversals and transpositions. We prove this result by introducing three bridge structures in the breakpoint graph, namely, the L-bridge, T-bridge, and X-bridge and show that they model ``proper'' reversals, transpositions, transreversals, and block-interchanges, respectively. We show that we can always find at least one of these three bridges in any breakpoint graph, thus giving an upper bound on the number of operations needed. We prove a lower bound on the distance and use it to show that GSB has a 2-approximation ratio. An $ {\text {On}}^3 $ algorithm called GSB-I that is based on the GSB approximation scheme presented in this paper has recently been published by Yu, Hao, and Leong in [17]. We note that our 2-approximation scheme admits many possible implementations by varying the order we search for proper rearrangement operations.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Li:2019:DTP, author = "Limin Li and Menglan Cai", title = "Drug Target Prediction by Multi-View Low Rank Embedding", journal = j-TCBB, volume = "16", number = "5", pages = "1712--1721", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2706267", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Drug repositioning has been a key problem in drug development, and heterogeneous data sources are used to predict drug-target interactions by different approaches. However, most of studies focus on a single representation of drugs or proteins. It has been shown that integrating multi-view representations of drugs and proteins can strengthen the prediction ability. For example, a drug can be represented by its chemical structure, or by its chemical response in different cells. A protein can be represented by its sequence, or by its gene expression values in different cells. The docking of drugs and proteins based on their structure can be considered as one view structural view, and the chemical performance of them based on gene expression and drug response can be considered as another view chemical view. In this work, we first propose a single-view approach of SLRE based on low rank embedding for an arbitrary view, and then extend it to a multi-view approach of MLRE, which could integrate both views. Our experiments show that our methods perform significantly better than baseline methods including single-view methods and multi-view methods. We finally report predicted drug-target interactions for 30 FDA-approved drugs.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Xie:2019:EEM, author = "Wen-Bin Xie and Hong Yan and Xing-Ming Zhao", title = "{EmDL}: Extracting {miRNA}--Drug Interactions from Literature", journal = j-TCBB, volume = "16", number = "5", pages = "1722--1728", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2723394", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The microRNAs miRNAs, regulators of post-transcriptional processes, have been found to affect the efficacy of drugs by regulating the biological processes in which the target proteins of drugs may be involved. For example, some drugs develop resistance when certain miRNAs are overexpressed. Therefore, identifying miRNAs that affect drug effects can help understand the mechanisms of drug actions and design more efficient drugs. Although some computational approaches have been developed to predict miRNA-drug associations, such associations rarely provide explicit information about which miRNAs and how they affect drug efficacy. On the other hand, there are rich information about which miRNAs affect the efficacy of which drugs in the literature. In this paper, we present a novel text mining approach, named as EmDL Extracting miRNA-Drug interactions from Literature, to extract the relationships of miRNAs affecting drug efficacy from literature. Benchmarking on the drug-miRNA interactions manually extracted from MEDLINE and PubMed Central, EmDL outperforms traditional text mining approaches as well as other popular methods for predicting drug-miRNA associations. Specifically, EmDL can effectively identify the sentences that describe the relationships of miRNAs affecting drug effects. The drug-miRNA interactome presented here can help understand how miRNAs affect drug effects and provide insights into the mechanisms of drug actions. In addition, with the information about drug-miRNA interactions, more effective drugs or combinatorial strategies can be designed in the future. The data used here can be accessed at http://mtd.comp-sysbio.org/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:LSF, author = "Yanbo Wang and Quan Liu and Shan Huang and Bo Yuan", title = "Learning a Structural and Functional Representation for Gene Expressions: To Systematically Dissect Complex Cancer Phenotypes", journal = j-TCBB, volume = "16", number = "5", pages = "1729--1742", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2702161", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Cancer is a heterogeneous disease, thus one of the central problems is how to dissect the resulting complex phenotypes in terms of their biological building blocks. Computationally, this is to represent and interpret high dimensional observations through a structural and conceptual abstraction into the most influential determinants underlying the problem. The working hypothesis of this report is to consider gene interaction to be largely responsible for the manifestation of complex cancer phenotypes, thus where the representation is to be conceptualized. Here, we report a representation learning strategy combined with regularizations, in which gene expressions are described in terms of a regularized product of meta-genes and their expression levels. The meta-genes are constrained by gene interactions thus representing their original topological contexts. The expression levels are supervised by their conditional dependencies among the observations thus providing a cluster-specific constraint. We obtain both of these structural constraints using a node-based graphical model. Our representation allows the selection of more influential modules, thus implicating their possible roles in neoplastic transformations. We validate our representation strategy by its robust recognitions of various cancer phenotypes comparing with various classical methods. The modules discovered are either shared or specify for different types or stages of human cancers, all of which are consistent with literature and biology.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wang:2019:NCA, author = "Yishu Wang and Huaying Fang and Dejie Yang and Hongyu Zhao and Minghua Deng", title = "Network Clustering Analysis Using Mixture Exponential-Family Random Graph Models and Its Application in Genetic Interaction Data", journal = j-TCBB, volume = "16", number = "5", pages = "1743--1752", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2743711", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Motivation: Epistatic miniarrary profile EMAP studies have enabled the mapping of large-scale genetic interaction networks and generated large amounts of data in model organisms. It provides an incredible set of molecular tools and advanced technologies that should be efficiently understanding the relationship between the genotypes and phenotypes of individuals. However, the network information gained from EMAP cannot be fully exploited using the traditional statistical network models. Because the genetic network is always heterogeneous, for example, the network structure features for one subset of nodes are different from those of the left nodes. Exponential-family random graph models ERGMs are a family of statistical models, which provide a principled and flexible way to describe the structural features e.g., the density, centrality, and assortativity of an observed network. However, the single ERGM is not enough to capture this heterogeneity of networks. In this paper, we consider a mixture ERGM MixtureEGRM networks, which model a network with several communities, where each community is described by a single EGRM. Results: EM algorithm is a classical method to solve the mixture problem, however, it will be very slow when the data size is huge in the numerous applications. We adopt an efficient novel online graph clustering algorithm to classify the graph nodes and estimate the ERGM parameters for the MixtureERGM. In comparison studies, the MixtureERGM outperforms the role analysis for the network cluster in which the mixture of exponential-family random graph model is developed for many ego-network according to their roles. One genetic interaction network of yeast and two real social networks provided as supplemental materials, which can be found on the Computer Society Digital Library at http://doi.ieeecomputersociety.org/10.1109/TCBB.2017.2743711 show the wide potential application of the MixtureERGM.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhao:2019:PPI, author = "Zhenni Zhao and Xinqi Gong", title = "Protein--Protein Interaction Interface Residue Pair Prediction Based on Deep Learning Architecture", journal = j-TCBB, volume = "16", number = "5", pages = "1753--1759", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2706682", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Motivation: Proteins usually fulfill their biological functions by interacting with other proteins. Although some methods have been developed to predict the binding sites of a monomer protein, these are not sufficient for prediction of the interaction between two monomer proteins. The correct prediction of interface residue pairs from two monomer proteins is still an open question and has great significance for practical experimental applications in the life sciences. We hope to build a method for the prediction of interface residue pairs that is suitable for those applications. Results: Here, we developed a novel deep network architecture called the multi-layered Long-Short Term Memory networks LSTMs approach for the prediction of protein interface residue pairs. First, we created three new descriptions and used other six worked characterizations to describe an amino acid, then we employed these features to discriminate between interface residue pairs and non-interface residue pairs. Second, we used two thresholds to select residue pairs that are more likely to be interface residue pairs. Furthermore, this step increases the proportion of interface residue pairs and reduces the influence of imbalanced data. Third, we built deep network architectures based on Long-Short Term Memory networks algorithm to organize and refine the prediction of interface residue pairs by employing features mentioned above. We trained the deep networks on dimers in the unbound state in the international Protein-protein Docking Benchmark version 3.0. The updated data sets in the versions 4.0 and 5.0 were used as the validation set and test set respectively. For our best model, the accuracy rate was over 62 percent when we chose the top 0.2 percent pairs of every dimer in the test set as predictions, which will be very helpful for the understanding of protein-protein interaction mechanisms and for guidance in biological experiments.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Yang:2019:RHD, author = "Xi Yang and Guoqiang Han and Hongmin Cai and Yan Song", title = "Recovering Hidden Diagonal Structures via Non-Negative Matrix Factorization with Multiple Constraints", journal = j-TCBB, volume = "16", number = "5", pages = "1760--1772", month = sep, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2690282", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Nov 29 16:39:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Revealing data with intrinsically diagonal block structures is particularly useful for analyzing groups of highly correlated variables. Earlier researches based on non-negative matrix factorization NMF have been shown to be effective in representing such data by decomposing the observed data into two factors, where one factor is considered to be the feature and the other the expansion loading from a linear algebra perspective. If the data are sampled from multiple independent subspaces, the loading factor would possess a diagonal structure under an ideal matrix decomposition. However, the standard NMF method and its variants have not been reported to exploit this type of data via direct estimation. To address this issue, a non-negative matrix factorization with multiple constraints model is proposed in this paper. The constraints include an sparsity norm on the feature matrix and a total variational norm on each column of the loading matrix. The proposed model is shown to be capable of efficiently recovering diagonal block structures hidden in observed samples. An efficient numerical algorithm using the alternating direction method of multipliers model is proposed for optimizing the new model. Compared with several benchmark models, the proposed method performs robustly and effectively for simulated and real biological data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Halder:2019:LII, author = "Anup Kumar Halder and Piyali Chatterjee and Mita Nasipuri and Dariusz Plewczynski and Subhadip Basu", title = "{3gClust}: Human Protein Cluster Analysis", journal = j-TCBB, volume = "16", number = "6", pages = "1773--1784", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2840996", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2840996", abstract = "We present a human protein cluster analysis by combining: (1) $n$-gram based amino acid frequency features, (2) optimal feature selection, (3) hierarchical clustering, and (4) advanced partitioning techniques. Our method qualitatively and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Majumder:2019:CMD, author = "Aurpan Majumder and Mrityunjay Sarkar and Prolay Sharma", title = "A Composite Mode Differential Gene Regulatory Architecture based on Temporal Expression Profiles", journal = j-TCBB, volume = "16", number = "6", pages = "1785--1793", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2828418", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2828418", abstract = "Exploring the complex interactive mechanism in a Gene Regulatory Network (GRN) developed using transcriptome data obtained from standard microarray and/or RNA-seq experiments helps us to understand the triggering factors in cancer research. The \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Feng:2019:DLA, author = "Yujie Feng and Fan Yang and Xichuan Zhou and Yanli Guo and Fang Tang and Fengbo Ren and Jishun Guo and Shuiwang Ji", title = "A Deep Learning Approach for Targeted Contrast-Enhanced Ultrasound Based Prostate Cancer Detection", journal = j-TCBB, volume = "16", number = "6", pages = "1794--1801", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2835444", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2835444", abstract = "The important role of angiogenesis in cancer development has driven many researchers to investigate the prospects of noninvasive cancer diagnosis based on the technology of contrast-enhanced ultrasound (CEUS) imaging. This paper presents a deep learning \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Brankovic:2019:DFS, author = "Aida Brankovic and Marjan Hosseini and Luigi Piroddi", title = "A Distributed Feature Selection Algorithm Based on Distance Correlation with an Application to Microarrays", journal = j-TCBB, volume = "16", number = "6", pages = "1802--1815", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2833482", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2833482", abstract = "DNA microarray datasets are characterized by a large number of features with very few samples, which is a typical cause of overfitting and poor generalization in the classification task. Here, we introduce a novel feature selection (FS) approach which \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2019:MLP, author = "Li Zhang and Ho-Chun Wu and Cheuk-Hei Ho and Shing-Chow Chan", title = "A Multi-{Laplacian} Prior and Augmented {Lagrangian} Approach to the Exploratory Analysis of Time-Varying Gene and Transcriptional Regulatory Networks for Gene Microarray Data", journal = j-TCBB, volume = "16", number = "6", pages = "1816--1829", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2828810", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2828810", abstract = "This paper proposes a novel multi-Laplacian prior (MLP) and augmented Lagrangian method (ALM) approach for gene interactions and putative transcription factors (TFs) identification from time-course gene microarray data. It employs a non-linear time-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Malhotra:2019:PTE, author = "Anvita Gupta Malhotra and Sudha Singh and Mohit Jha and Khushhali M. Pandey", title = "A Parametric Targetability Evaluation Approach for Vitiligo Proteome Extracted through Integration of Gene Ontologies and Protein Interaction Topologies", journal = j-TCBB, volume = "16", number = "6", pages = "1830--1842", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2835459", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2835459", abstract = "Vitiligo is a well-known skin disorder with complex etiology. Vitiligo pathogenesis is multifaceted with many ramifications. A computational systemic path was designed to first propose candidate disease proteins by merging properties from protein \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Timonen:2019:PFM, author = "Juho Timonen and Henrik Mannerstr{\"o}m and Harri L{\"a}hdesm{\"a}ki and Jukka Intosalmi", title = "A Probabilistic Framework for Molecular Network Structure Inference by Means of Mechanistic Modeling", journal = j-TCBB, volume = "16", number = "6", pages = "1843--1854", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2825327", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2825327", abstract = "Ordinary differential equations (ODEs) provide a powerful formalism to model molecular networks mechanistically. However, inferring the model structure, given a set of time course measurements and a large number of alternative molecular mechanisms, is a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2019:IFP, author = "Xiaoke Ma and Penggang Sun and Zhong-Yuan Zhang", title = "An Integrative Framework for Protein Interaction Network and Methylation Data to Discover Epigenetic Modules", journal = j-TCBB, volume = "16", number = "6", pages = "1855--1866", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2831666", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2831666", abstract = "DNA methylation is a critical epigenetic modification that plays an important role in cancers. The available algorithms fail to fully characterize epigenetic modules. To address this issue, we first characterize the epigenetic module as a group of well-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Caudai:2019:LSC, author = "Claudia Caudai and Emanuele Salerno and Monica Zopp{\`e} and Ivan Merelli and Anna Tonazzini", title = "{ChromStruct 4}: a {Python} Code to Estimate the Chromatin Structure from {Hi-C} Data", journal = j-TCBB, volume = "16", number = "6", pages = "1867--1878", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2838669", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2838669", abstract = "A method and a stand-alone Python code to estimate the 3D chromatin structure from chromosome conformation capture data are presented. The method is based on a multiresolution, modified-bead-chain chromatin model, evolved through quaternion operators in a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2019:CCK, author = "Huiwei Zhou and Yunlong Yang and Shixian Ning and Zhuang Liu and Chengkun Lang and Yingyu Lin and Degen Huang", title = "Combining Context and Knowledge Representations for Chemical-Disease Relation Extraction", journal = j-TCBB, volume = "16", number = "6", pages = "1879--1889", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2838661", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2838661", abstract = "Automatically extracting the relationships between chemicals and diseases is significantly important to various areas of biomedical research and health care. Biomedical experts have built many large-scale knowledge bases (KBs) to advance the development \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Luo:2019:CDR, author = "Huimin Luo and Jianxin Wang and Min Li and Junwei Luo and Peng Ni and Kaijie Zhao and Fang-Xiang Wu and Yi Pan", title = "Computational Drug Repositioning with Random Walk on a Heterogeneous Network", journal = j-TCBB, volume = "16", number = "6", pages = "1890--1900", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2832078", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2832078", abstract = "Drug repositioning is an efficient and promising strategy to identify new indications for existing drugs, which can improve the productivity of traditional drug discovery and development. Rapid advances in high-throughput technologies have generated \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Stella:2019:DDB, author = "Sabrina Stella and Roberto Chignola and Edoardo Milotti", title = "Dynamical Detection of Boundaries and Cavities in Biophysical Cell-Based Simulations of Growing Tumor Tissues", journal = j-TCBB, volume = "16", number = "6", pages = "1901--1911", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2827374", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2827374", abstract = "Cell-based lattice-free simulations of the growth of tumor tissues require the definition of geometrical and topological relations among cells and the other basic elements of the simulation (most notably the local and the global environments). This is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2019:EAF, author = "Xingyu Cai and Abdullah-Al Mamun and Sanguthevar Rajasekaran", title = "Efficient Algorithms for Finding the Closest $ \ell 1$-Mers in Biological Data", journal = j-TCBB, volume = "16", number = "6", pages = "1912--1921", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2843364", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2843364", abstract = "With the advances in the next generation sequencing technology, huge amounts of data have been and get generated in biology. A bottleneck in dealing with such datasets lies in developing effective algorithms for extracting useful information from them. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2019:EDP, author = "Lun Hu and Xiaohui Yuan and Xing Liu and Shengwu Xiong and Xin Luo", title = "Efficiently Detecting Protein Complexes from Protein Interaction Networks via Alternating Direction Method of Multipliers", journal = j-TCBB, volume = "16", number = "6", pages = "1922--1935", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2844256", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2844256", abstract = "Protein complexes are crucial in improving our understanding of the mechanisms employed by proteins. Various computational algorithms have thus been proposed to detect protein complexes from protein interaction networks. However, given massive protein \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dehghani:2019:EPA, author = "Toktam Dehghani and Mahmoud Naghibzadeh and Javad Sadri", title = "Enhancement of Protein $ \beta $-Sheet Topology Prediction Using Maximum Weight Disjoint Path Cover", journal = j-TCBB, volume = "16", number = "6", pages = "1936--1947", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2837753", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2837753", abstract = "Predicting $ \beta $-sheet topology ($ \beta $-topology) is one of the most critical intermediate steps towards protein structure and function prediction. The $ \beta $-topology prediction problem is defined as the determination of the optimal arrangement of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2019:FNN, author = "Xue Jiang and Han Zhang and Zhao Zhang and Xiongwen Quan", title = "Flexible Non-Negative Matrix Factorization to Unravel Disease-Related Genes", journal = j-TCBB, volume = "16", number = "6", pages = "1948--1957", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2823746", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2823746", abstract = "Recently, non-negative matrix factorization (NMF) has been shown to perform well in the analysis of omics data. NMF assumes that the expression level of one gene is a linear additive composition of metagenes. The elements in metagene matrix represent the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Milano:2019:GNA, author = "Marianna Milano and Pietro Hiram Guzzi and Mario Cannataro", title = "{GLAlign}: a Novel Algorithm for Local Network Alignment", journal = j-TCBB, volume = "16", number = "6", pages = "1958--1969", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2830323", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2830323", abstract = "Networks are successfully used as a modelling framework in many application domains. For instance, Protein-Protein Interaction Networks (PPINs) model the set of interactions among proteins in a cell. A critical application of network analysis is the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Paul:2019:IDE, author = "Amit Paul and Jaya Sil", title = "Identification of Differentially Expressed Genes to Establish New Biomarker for Cancer Prediction", journal = j-TCBB, volume = "16", number = "6", pages = "1970--1985", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2837095", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2837095", abstract = "The goal of the human genome project is to integrate genetic information into different clinical therapies. To achieve this goal, different computational algorithms are devised for identifying the biomarker genes, cause of complex diseases. However, most \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hao:2019:ICG, author = "Xiaoke Hao and Xiaohui Yao and Shannon L. Risacher and Andrew J. Saykin and Jintai Yu and Huifu Wang and Lan Tan and Li Shen and Daoqiang Zhang", title = "Identifying Candidate Genetic Associations with {MRI}-Derived {AD}-Related {ROI} via Tree-Guided Sparse Learning", journal = j-TCBB, volume = "16", number = "6", pages = "1986--1996", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2833487", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2833487", abstract = "Imaging genetics has attracted significant interests in recent studies. Traditional work has focused on mass-univariate statistical approaches that identify important single nucleotide polymorphisms (SNPs) associated with quantitative traits (QTs) of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fan:2019:ILS, author = "Anjing Fan and Haitao Wang and Hua Xiang and Xiufen Zou", title = "Inferring Large-Scale Gene Regulatory Networks Using a Randomized Algorithm Based on Singular Value Decomposition", journal = j-TCBB, volume = "16", number = "6", pages = "1997--2008", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2825446", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2825446", abstract = "Reconstructing large-scale gene regulatory networks (GRNs) is a challenging problem in the field of computational biology. Various methods for inferring GRNs have been developed, but they fail to accurately infer GRNs with a large number of genes. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chaudhury:2019:MVS, author = "Ayan Chaudhury and Christopher Ward and Ali Talasaz and Alexander G. Ivanov and Mark Brophy and Bernard Grodzinski and Norman P. A. H{\"u}ner and Rajnikant V. Patel and John L. Barron", title = "Machine Vision System for {$3$D} Plant Phenotyping", journal = j-TCBB, volume = "16", number = "6", pages = "2009--2022", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2824814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2824814", abstract = "Machine vision for plant {\em phenotyping\/} is an emerging research area for producing high throughput in agriculture and crop science applications. Since 2D based approaches have their inherent limitations, 3D plant analysis is becoming state \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nalbantoglu:2019:MAM, author = "O. Ufuk Nalbantoglu and Khalid Sayood", title = "{MIMOSA}: Algorithms for Microbial Profiling", journal = j-TCBB, volume = "16", number = "6", pages = "2023--2034", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2830324", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2830324", abstract = "A significant goal of the study of metagenomes obtained from an environment is to find the microbial diversity and the abundance of each organism in the community. Phylotyping and binning methods which address this problem generally operate using either \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tsompanas:2019:MMF, author = "Michail-Antisthenis Tsompanas and Andrew Adamatzky and Ioannis Ieropoulos and Neil William Phillips and Georgios Ch. Sirakoulis and John Greenman", title = "Modelling Microbial Fuel Cells Using Lattice {Boltzmann} Methods", journal = j-TCBB, volume = "16", number = "6", pages = "2035--2045", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2831223", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2831223", abstract = "An accurate modelling of bio-electrochemical processes that govern Microbial Fuel Cells (MFCs) and mapping their behavior according to several parameters will enhance the development of MFC technology and enable their successful implementation in well \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2019:PFI, author = "Runtao Yang and Chengjin Zhang and Rui Gao and Lina Zhang and Qing Song", title = "Predicting {FAD} Interacting Residues with Feature Selection and Comprehensive Sequence Descriptors", journal = j-TCBB, volume = "16", number = "6", pages = "2046--2056", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2824332", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2824332", abstract = "The function of a flavoprotein is determined to a great extent by the binding sites on its surface that interacts with flavin adenine dinucleotide (FAD). Malfunction or dysregulation of FAD binding leads to a series of diseases. Therefore, accurately \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{K:2019:PDL, author = "MD Aksam V. K. and V. M. Chandrasekaran and Sundaramurthy Pandurangan", title = "Protein Domain Level Cancer Drug Targets in the Network of {MAPK} Pathways", journal = j-TCBB, volume = "16", number = "6", pages = "2057--2065", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2829507", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2829507", abstract = "Proteins in the MAPK pathways considered as potential drug targets for cancer treatment. Pathways along with the cross-talks increase their scope to view them as a network of MAPK pathways. Side effect causing targeted domains act as a proxy for drug \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Biswas:2019:RIM, author = "Ashis Kumer Biswas and Dong-Chul Kim and Mingon Kang and Jean X. Gao", title = "Robust Inductive Matrix Completion Strategy to Explore Associations Between {LincRNAs} and Human Disease Phenotypes", journal = j-TCBB, volume = "16", number = "6", pages = "2066--2077", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2844816", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2844816", abstract = "Over the past few years, it has been established that a number of long intergenic non-coding RNAs (lincRNAs) are linked to a wide variety of human diseases. The relationship among many other lincRNAs still remains as puzzle. Validation of such link \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wingfield:2019:RMM, author = "Benjamin Wingfield and Sonya Coleman and TM McGinnity and AJ Bjourson", title = "Robust Microbial Markers for Non-Invasive Inflammatory Bowel Disease Identification", journal = j-TCBB, volume = "16", number = "6", pages = "2078--2088", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2831212", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2831212", abstract = "Inflammatory Bowel Disease (IBD) is an umbrella term for a group of inflammatory diseases of the gastrointestinal tract, including Crohn's Disease and ulcerative colitis. Changes to the intestinal microbiome, the community of micro-organisms that \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sevakula:2019:TLM, author = "Rahul K. Sevakula and Vikas Singh and Nishchal K. Verma and Chandan Kumar and Yan Cui", title = "Transfer Learning for Molecular Cancer Classification Using Deep Neural Networks", journal = j-TCBB, volume = "16", number = "6", pages = "2089--2100", month = nov, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2822803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:47 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2822803", abstract = "The emergence of deep learning has impacted numerous machine learning based applications and research. The reason for its success lies in two main advantages: (1) it provides the ability to learn very complex non-linear relationships between features and (2). \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Martin-Vide:2020:ACB, author = "Carlos Mart{\'\i}n-Vide and Miguel A. Vega-Rodr{\'\i}guez", title = "Algorithms for Computational Biology: Fifth Edition", journal = j-TCBB, volume = "17", number = "1", pages = "1--1", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2949851", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2019.2949851", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Brito:2020:HRT, author = "Klairton Lima Brito and Andre Rodrigues Oliveira and Ulisses Dias and Zanoni Dias", title = "Heuristics for the Reversal and Transposition Distance Problem", journal = j-TCBB, volume = "17", number = "1", pages = "2--13", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2945759", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2019.2945759", abstract = "We present three heuristics --- {\em Sliding Window}, {\em Look Ahead}, and {Iterative Sliding Window} --- to improve solutions for the Sorting Signed Permutations by Reversals and Transpositions Problem. We investigate the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{vanIersel:2020:PTA, author = "Leo van Iersel and Remie Janssen and Mark Jones and Yukihiro Murakami and Norbert Zeh", title = "Polynomial-Time Algorithms for Phylogenetic Inference Problems Involving Duplication and Reticulation", journal = j-TCBB, volume = "17", number = "1", pages = "14--26", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2934957", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2019.2934957", abstract = "A common problem in phylogenetics is to try to infer a species phylogeny from gene trees. We consider different variants of this problem. The first variant, called {\sc Unrestricted Minimal Episodes Inference}, aims at inferring a species tree based on \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Miyawaki-Kuwakado:2020:PMC, author = "Atsuko Miyawaki-Kuwakado and Soichiro Komori and Fumihide Shiraishi", title = "A Promising Method for Calculating True Steady-State Metabolite Concentrations in Large-Scale Metabolic Reaction Network Models", journal = j-TCBB, volume = "17", number = "1", pages = "27--36", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2853724", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2853724", abstract = "The calculation of steady-state metabolite concentrations in metabolic reaction network models is the first step in the sensitivity analysis of a metabolic reaction system described by differential equations. However, this calculation becomes very \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Maji:2020:SEA, author = "Ranjan Kumar Maji and Sunirmal Khatua and Zhumur Ghosh", title = "A Supervised Ensemble Approach for Sensitive {microRNA} Target Prediction", journal = j-TCBB, volume = "17", number = "1", pages = "37--46", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858252", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858252", abstract = "MicroRNAs, a class of small non-coding RNAs, regulate important biological functions via post-transcriptional regulation of messenger RNAs (mRNAs). Despite rapid development in miRNA research, precise experimental methods to determine miRNA target \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kocak:2020:ABP, author = "Mehmet Kocak and Khyobeni Mozhui", title = "An Application of the {Bayesian} Periodicity Test to Identify Diurnal Rhythm Genes in the Brain", journal = j-TCBB, volume = "17", number = "1", pages = "47--55", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2859971", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2859971", abstract = "Biological systems are extremely dynamic and many aspects of cellular processes show rhythmic circadian patterns. Extracting such information from large expression data is challenging. In this work, we present a modified application of the Empirical Bayes \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ahmed:2020:AEC, author = "Syed Sazzad Ahmed and Swarup Roy and Jugal Kalita", title = "Assessing the Effectiveness of Causality Inference Methods for Gene Regulatory Networks", journal = j-TCBB, volume = "17", number = "1", pages = "56--70", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2853728", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2853728", abstract = "Causality inference is the use of computational techniques to predict possible causal relationships for a set of variables, thereby forming a directed network. Causality inference in Gene Regulatory Networks (GRNs) is an important, yet challenging task \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2020:CBM, author = "Cheng-Hong Yang and Yu-Da Lin and Li-Yeh Chuang", title = "Class Balanced Multifactor Dimensionality Reduction to Detect Gene--Gene Interactions", journal = j-TCBB, volume = "17", number = "1", pages = "71--81", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858776", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858776", abstract = "Detecting gene--gene interactions in single-nucleotide polymorphism data is vital for understanding disease susceptibility. However, existing approaches may be limited by the sample size in case--control studies. Herein, we propose a balance \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{weiTan:2020:DLP, author = "Jing wei Tan and Siow-Wee Chang and Sameem Abdul-Kareem and Hwa Jen Yap and Kien-Thai Yong", title = "Deep Learning for Plant Species Classification Using Leaf Vein Morphometric", journal = j-TCBB, volume = "17", number = "1", pages = "82--90", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2848653", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2848653", abstract = "An automated plant species identification system could help botanists and layman in identifying plant species rapidly. Deep learning is robust for feature extraction as it is superior in providing deeper information of images. In this research, a new CNN-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Feng:2020:DMP, author = "Yangqin Feng and Lei Zhang and Juan Mo", title = "Deep Manifold Preserving Autoencoder for Classifying Breast Cancer Histopathological Images", journal = j-TCBB, volume = "17", number = "1", pages = "91--101", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858763", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858763", abstract = "Classifying breast cancer histopathological images automatically is an important task in computer assisted pathology analysis. However, extracting informative and non-redundant features for histopathological image classification is challenging due to the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Habibi:2020:DPC, author = "Mahnaz Habibi and Pegah Khosravi", title = "Disruption of Protein Complexes from Weighted Complex Networks", journal = j-TCBB, volume = "17", number = "1", pages = "102--109", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2859952", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2859952", abstract = "Essential proteins are indispensable units for living organisms. Removing those leads to disruption of protein complexes and causing lethality. Recently, theoretical methods have been presented to detect essential proteins in protein interaction network. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{He:2020:DSJ, author = "Yicheng He and Junfeng Liu and Xia Ning", title = "Drug Selection via Joint Push and Learning to Rank", journal = j-TCBB, volume = "17", number = "1", pages = "110--123", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2848908", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2848908", abstract = "Selecting the right drugs for the right patients is a primary goal of precision medicine. In this article, we consider the problem of cancer drug selection in a learning-to-rank framework. We have formulated the cancer drug selection problem as to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2020:EPD, author = "Jiyun Zhou and Qin Lu and Ruifeng Xu and Lin Gui and Hongpeng Wang", title = "{EL\_LSTM}: Prediction of {DNA}-Binding Residue from Protein Sequence by Combining Long Short-Term Memory and Ensemble Learning", journal = j-TCBB, volume = "17", number = "1", pages = "124--135", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858806", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858806", abstract = "Most past works for DNA-binding residue prediction did not consider the relationships between residues. In this paper, we propose a novel approach for DNA-binding residue prediction, referred to as EL_LSTM, which includes two main components. The first \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pan:2020:FBG, author = "Tony Pan and Rahul Nihalani and Srinivas Aluru", title = "Fast {de Bruijn} Graph Compaction in Distributed Memory Environments", journal = j-TCBB, volume = "17", number = "1", pages = "136--148", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858797", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858797", abstract = "De Bruijn graph based genome assembly has gained popularity as short read sequencers become ubiquitous. A core assembly operation is the generation of unitigs, which are sequences corresponding to chains in the graph. Unitigs are used as building blocks \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Marczyk:2020:GXA, author = "Michal Marczyk and Roman Jaksik and Andrzej Polanski and Joanna Polanska", title = "{GaMRed}-Adaptive Filtering of High-Throughput Biological Data", journal = j-TCBB, volume = "17", number = "1", pages = "149--157", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858825", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858825", abstract = "Data filtering based on removing non-informative features, with unchanged signal between compared experimental conditions, can significantly increase sensitivity of methods used to detect differentially expressed genes or other molecular components \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pons:2020:GLL, author = "Joan Carles Pons and Celine Scornavacca and Gabriel Cardona", title = "Generation of Level-$k$ k {LGT} Networks", journal = j-TCBB, volume = "17", number = "1", pages = "158--164", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2895344", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2019.2895344", abstract = "Phylogenetic networks provide a mathematical model to represent the evolution of a set of species where, apart from speciation, reticulate evolutionary events have to be taken into account. Among these events, lateral gene transfers need special \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2020:IMM, author = "Jiulun Cai and Hongmin Cai and Jiazhou Chen and Xi Yang", title = "Identifying ``Many-to--Many'' Relationships between Gene-Expression Data and Drug-Response Data via Sparse Binary Matching", journal = j-TCBB, volume = "17", number = "1", pages = "165--176", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849708", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2849708", abstract = "Identifying gene-drug patterns is a critical step in pharmacology for unveiling disease mechanisms and drug discovery. The availability of high-throughput technologies accumulates massive large-scale pharmacological and genomic data, and thus provides a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liao:2020:ILI, author = "Xingyu Liao and Min Li and Junwei Luo and You Zou and Fang-Xiang Wu and Yi Pan and Feng Luo and Jianxin Wang", title = "Improving {\em de novo\/} Assembly Based on Read Classification", journal = j-TCBB, volume = "17", number = "1", pages = "177--188", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2861380", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2861380", abstract = "Due to sequencing bias, sequencing error, and repeat problems, the genome assemblies usually contain misarrangements and gaps. When tackling these problems, current assemblers commonly consider the read libraries as a whole and adopt the same strategy to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:LPL, author = "Tianyi Zhang and Minghui Wang and Jianing Xi and Ao Li", title = "{LPGNMF}: Predicting Long Non-Coding {RNA} and Protein Interaction Using Graph Regularized Nonnegative Matrix Factorization", journal = j-TCBB, volume = "17", number = "1", pages = "189--197", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2861009", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2861009", abstract = "Long non-coding RNAs (lncRNA) play crucial roles in a variety of biological processes and complex diseases. Massive studies have indicated that lncRNAs interact with related proteins to exert regulation of cellular biological processes. Because it is time-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Carvajal-Lopez:2020:MBQ, author = "Patricia Carvajal-L{\'o}pez and Fernando D. {Von Borstel} and Amada Torres and Gabriella Rustici and Joaqu{\'\i}n Guti{\'e}rrez and Eduardo Romero-Vivas", title = "Microarray-Based Quality Assessment as a Supporting Criterion for {\em de novo\/} Transcriptome Assembly Selection", journal = j-TCBB, volume = "17", number = "1", pages = "198--206", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2860997", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2860997", abstract = "RNA-Sequencing and {\em de novo\/} assembly have enabled the analysis of species with non-available reference transcriptomes, although intrinsic features (biological and technical) induce errors in the reconstruction. A strategy to resolve these \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Acharya:2020:MFG, author = "Sudipta Acharya and Sriparna Saha and Prasanna Pradhan", title = "Multi-Factored Gene-Gene Proximity Measures Exploiting Biological Knowledge Extracted from Gene Ontology: Application in Gene Clustering", journal = j-TCBB, volume = "17", number = "1", pages = "207--219", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849362", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2849362", abstract = "To describe the cellular functions of proteins and genes, a potential dynamic vocabulary is Gene Ontology (GO), which comprises of three sub-ontologies namely, Biological-process, Cellular-component, and Molecular-function. It has several applications in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:MMT, author = "Tao Li and Xiankai Zhang and Feng Luo and Fang-Xiang Wu and Jianxin Wang", title = "{MultiMotifMaker}: a Multi-Thread Tool for Identifying {DNA} Methylation Motifs from {Pacbio} Reads", journal = j-TCBB, volume = "17", number = "1", pages = "220--225", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2861399", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2861399", abstract = "The methylation of DNA is an important mechanism to control biological processes. Recently, the Pacbio SMRT technology provides a new way to identify base methylation in the genome. MotifMaker is a tool developed by Pacbio for discovering DNA methylation \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:NIM, author = "Xiangtao Li and Shixiong Zhang and Ka-Chun Wong", title = "Nature-Inspired Multiobjective Epistasis Elucidation from Genome-Wide Association Studies", journal = j-TCBB, volume = "17", number = "1", pages = "226--237", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849759", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2849759", abstract = "In recent years, the detection of epistatic interactions of multiple genetic variants on the causes of complex diseases brings a significant challenge in genome-wide association studies (GWAS). However, most of the existing methods still suffer from \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2020:NGF, author = "Guoxian Yu and Keyao Wang and Guangyuan Fu and Maozu Guo and Jun Wang", title = "{NMFGO}: Gene Function Prediction via Nonnegative Matrix Factorization with Gene Ontology", journal = j-TCBB, volume = "17", number = "1", pages = "238--249", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2861379", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2861379", abstract = "Gene Ontology (GO) is a controlled vocabulary of terms that describe molecule function, biological roles, and cellular locations of gene products (i.e., proteins and RNAs), it hierarchically organizes more than 43,000 GO terms via the direct acyclic \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{pour:2020:OBF, author = "Ali Foroughi pour and Lori A. Dalton", title = "Optimal {Bayesian} Filtering for Biomarker Discovery: Performance and Robustness", journal = j-TCBB, volume = "17", number = "1", pages = "250--263", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858814", abstract = "Optimal Bayesian feature filtering (OBF) is a fast and memory-efficient algorithm that optimally identifies markers with distributional differences between treatment groups under Gaussian models. Here, we study the performance and robustness of OBF for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mazrouee:2020:PMF, author = "Sepideh Mazrouee and Wei Wang", title = "{PolyCluster}: Minimum Fragment Disagreement Clustering for Polyploid Phasing", journal = j-TCBB, volume = "17", number = "1", pages = "264--277", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2858803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2858803", abstract = "Phasing is an emerging area in computational biology with important applications in clinical decision making and biomedical sciences. While machine learning techniques have shown tremendous potential in many biomedical applications, their utility in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pyne:2020:RRT, author = "Saptarshi Pyne and Alok Ranjan Kumar and Ashish Anand", title = "Rapid Reconstruction of Time-Varying Gene Regulatory Networks", journal = j-TCBB, volume = "17", number = "1", pages = "278--291", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2861698", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2861698", abstract = "Rapid advancements in high-throughput technologies have resulted in genome-scale time series datasets. Uncovering the temporal sequence of gene regulatory events, in the form of time-varying gene regulatory networks (GRNs), demands computationally fast, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Delgado:2020:STR, author = "Ram{\'o}n A. Delgado and Zhiyong Chen and Richard H. Middleton", title = "Stepwise {Tikhonov} Regularisation: Application to the Prediction of {HIV-1} Drug Resistance", journal = j-TCBB, volume = "17", number = "1", pages = "292--301", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2849369", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2849369", abstract = "This paper focuses on constructing genotypic predictors for antiretroviral drug susceptibility of HIV. To this end, a method to recover the largest elements of an unknown vector in a least squares problem is developed. The proposed method introduces two \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Alden:2020:UEE, author = "Kieran Alden and Jason Cosgrove and Mark Coles and Jon Timmis", title = "Using Emulation to Engineer and Understand Simulations of Biological Systems", journal = j-TCBB, volume = "17", number = "1", pages = "302--315", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2843339", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2843339", abstract = "Modeling and simulation techniques have demonstrated success in studying biological systems. As the drive to better capture biological complexity leads to more sophisticated simulators, it becomes challenging to perform statistical analyses that help \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ganor:2020:NGT, author = "Dor Ganor and Ron Y. Pinter and Meirav Zehavi", title = "A Note on {GRegNetSim}: a Tool for the Discrete Simulation and Analysis of Genetic Regulatory Networks", journal = j-TCBB, volume = "17", number = "1", pages = "316--320", month = jan, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2878749", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:48 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2878749", abstract = "Discrete simulations of genetic regulatory networks have been used to study subsystems of yeast successfully. Existing models underling these simulations are based on specific transition functions, which determine the node states in the network. However, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2020:BPD, author = "Ziwei Chen and Xiangqi Bai and Liang Ma and Xiawei Wang and Xiuqin Liu and Yuting Liu and Luonan Chen and Lin Wan", title = "A Branch Point on Differentiation Trajectory is the Bifurcating Event Revealed by Dynamical Network Biomarker Analysis of Single-Cell Data", journal = j-TCBB, volume = "17", number = "2", pages = "366--375", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2847690", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2847690", abstract = "The advance in single-cell profiling technologies and the development in computational algorithms provide the opportunity to reconstruct pseudo temporal trajectory with branch point of cellular development. On the other hand, theories such as dynamical \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:CCM, author = "Lihua Zhang and Shihua Zhang", title = "Comparison of Computational Methods for Imputing Single-Cell {RNA}-Sequencing Data", journal = j-TCBB, volume = "17", number = "2", pages = "376--389", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2848633", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2848633", abstract = "Single-cell RNA-sequencing (scRNA-seq) is a recent breakthrough technology, which paves the way for measuring RNA levels at single cell resolution to study precise biological functions. One of the main challenges when analyzing scRNA-seq data is the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:DDM, author = "Feng Li and Lin Gao and Bingbo Wang", title = "Detection of Driver Modules with Rarely Mutated Genes in Cancers", journal = j-TCBB, volume = "17", number = "2", pages = "390--401", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2846262", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2846262", abstract = "Identifying driver modules or pathways is a key challenge to interpret the molecular mechanisms and pathogenesis underlying cancer. An increasing number of studies suggest that rarely mutated genes are important for the development of cancer. However, the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2020:DSE, author = "Hao Jiang and Yushan Qiu and Wenpin Hou and Xiaoqing Cheng and Man Yi Yim and Wai-Ki Ching", title = "Drug Side-Effect Profiles Prediction: From Empirical to Structural Risk Minimization", journal = j-TCBB, volume = "17", number = "2", pages = "402--410", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2850884", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2850884", abstract = "The identification of drug side-effects is considered to be an important step in drug design, which could not only shorten the time but also reduce the cost of drug development. In this paper, we investigate the relationship between the potential side-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Feng:2020:EEL, author = "Zhan-Ying Feng and Yong Wang", title = "{ELF}: Extract Landmark Features By Optimizing Topology Maintenance, Redundancy, and Specificity", journal = j-TCBB, volume = "17", number = "2", pages = "411--421", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2846225", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2846225", abstract = "Feature selection is the process of selecting a subset of landmark features for model construction when there are many features and a comparatively few samples. The far-reaching development technologies such as biological sequencing at single cell level \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xi:2020:HNM, author = "Jianing Xi and Ao Li and Minghui Wang", title = "{HetRCNA}: a Novel Method to Identify Recurrent Copy Number Alternations from Heterogeneous Tumor Samples Based on Matrix Decomposition Framework", journal = j-TCBB, volume = "17", number = "2", pages = "422--434", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2846599", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2846599", abstract = "A common strategy to discovering cancer associated copy number aberrations (CNAs) from a cohort of cancer samples is to detect recurrent CNAs (RCNAs). Although the previous methods can successfully identify communal RCNAs shared by nearly all tumor \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2020:MDN, author = "Ye Liu and Michael K. Ng and Stephen Wu", title = "Multi-Domain Networks Association for Biological Data Using Block Signed Graph Clustering", journal = j-TCBB, volume = "17", number = "2", pages = "435--448", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2848904", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2848904", abstract = "Multi-domain biological network association and clustering have attracted a lot of attention in biological data integration and understanding, which can provide a more global and accurate understanding of biological phenomenon. In many problems, different \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shi:2020:QDD, author = "Jifan Shi and Juan Zhao and Xiaoping Liu and Luonan Chen and Tiejun Li", title = "Quantifying Direct Dependencies in Biological Networks by Multiscale Association Analysis", journal = j-TCBB, volume = "17", number = "2", pages = "449--458", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2846648", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2846648", abstract = "Partial correlation (PC) or conditional mutual information (CMI) is widely used in detecting direct dependencies between the observed variables in biological networks by eliminating indirect correlations/associations, but it fails whenever there are some \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kapoor:2020:GMM, author = "Rajan Kapoor and Aniruddha Datta and Chao Sima and Jianping Hua and Rosana Lopes and Michael L. Bittner", title = "A {Gaussian} Mixture-Model Exploiting Pathway Knowledge for Dissecting Cancer Heterogeneity", journal = j-TCBB, volume = "17", number = "2", pages = "459--468", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2869813", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2869813", abstract = "In this work, we develop a systematic approach for applying pathway knowledge to a multivariate Gaussian mixture model for dissecting a heterogeneous cancer tissue. The downstream transcription factors are selected as observables from available partial \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lecca:2020:RBM, author = "Paola Lecca and Angela Re", title = "A Reaction-Based Model of the State Space of Chemical Reaction Systems Enables Efficient Simulations", journal = j-TCBB, volume = "17", number = "2", pages = "469--482", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2894699", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2019.2894699", abstract = "The choice of the state space representation of a system can turn into a prominent advantage or burden in any endeavour to mathematically model dynamical systems since it entails the analytical tractability of the related modelling formalism and the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Riesen:2020:AGE, author = "Kaspar Riesen and Miquel Ferrer and Horst Bunke", title = "Approximate Graph Edit Distance in Quadratic Time", journal = j-TCBB, volume = "17", number = "2", pages = "483--494", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2478463", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2015.2478463", abstract = "Graph edit distance is one of the most flexible and general graph matching models available. The major drawback of graph edit distance, however, is its computational complexity that restricts its applicability to graphs of rather small size. Recently, the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lei:2020:AFS, author = "Xiujuan Lei and Xiaoqin Yang and Fang-Xiang Wu", title = "Artificial Fish Swarm Optimization Based Method to Identify Essential Proteins", journal = j-TCBB, volume = "17", number = "2", pages = "495--505", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2865567", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2865567", abstract = "It is well known that essential proteins play an extremely important role in controlling cellular activities in living organisms. Identifying essential proteins from protein protein interaction (PPI) networks is conducive to the understanding of cellular \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sen:2020:AAS, author = "Rishika Sen and Somnath Tagore and Rajat K. De", title = "{ASAPP}: Architectural Similarity-Based Automated Pathway Prediction System and Its Application in Host-Pathogen Interactions", journal = j-TCBB, volume = "17", number = "2", pages = "506--515", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2872527", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2872527", abstract = "The significance of metabolic pathway prediction is to envision the viable unknown transformations that can occur provided the appropriate enzymes are present. It can facilitate the prediction of the consequences of host-pathogen interactions. In this \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2020:BDF, author = "Haifen Chen and D. A. K. Maduranga and Piyushkumar A. Mundra and Jie Zheng", title = "{Bayesian} Data Fusion of Gene Expression and Histone Modification Profiles for Inference of Gene Regulatory Network", journal = j-TCBB, volume = "17", number = "2", pages = "516--525", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2869590", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2869590", abstract = "Accurately reconstructing gene regulatory networks (GRNs) from high-throughput gene expression data has been a major challenge in systems biology for decades. Many approaches have been proposed to solve this problem. However, there is still much room for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yazdani:2020:BFP, author = "Hossein Yazdani and Leo L. Cheng and David C. Christiani and Azam Yazdani", title = "Bounded Fuzzy Possibilistic Method Reveals Information about Lung Cancer through Analysis of Metabolomics", journal = j-TCBB, volume = "17", number = "2", pages = "526--535", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2869757", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2869757", abstract = "Learning methods, such as conventional clustering and classification, have been applied in diagnosing diseases to categorize samples based on their features. Going beyond clustering samples, membership degrees represent to what degree each sample belongs \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:DNN, author = "Yujie Li and Heng Huang and Hanbo Chen and Tianming Liu", title = "Deep Neural Networks for {{\em In Situ}} Hybridization Grid Completion and Clustering", journal = j-TCBB, volume = "17", number = "2", pages = "536--546", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864262", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2864262", abstract = "Transcriptome in brain plays a crucial role in understanding the cortical organization and the development of brain structure and function. Two challenges, incomplete data and high dimensionality of transcriptome, remain unsolved. Here, we present a novel \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Arabameri:2020:DCC, author = "Abazar Arabameri and Davud Asemani and Pegah Teymourpour", title = "Detection of Colorectal Carcinoma Based on Microbiota Analysis Using Generalized Regression Neural Networks and Nonlinear Feature Selection", journal = j-TCBB, volume = "17", number = "2", pages = "547--557", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2870124", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2870124", abstract = "To obtain a screening tool for colorectal cancer (CRC) based on gut microbiota, we seek here to identify an optimal classifier for CRC detection as well as a novel nonlinear feature selection method for determining the most discriminative microbial \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2020:DDF, author = "Shengping Yang and Mitchell S. Wachtel and Jiangrong Wu", title = "{DFseq}: Distribution-Free Method to Detect Differential Gene Expression for {RNA}-Sequencing Data", journal = j-TCBB, volume = "17", number = "2", pages = "558--565", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2866994", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2866994", abstract = "Many current RNA-sequencing data analysis methods compare expressions one gene at a time, taking little consideration of the correlations among genes. In this study, we propose a method to convert such an one-dimensional comparison approach into a two-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chowdhury:2020:DEA, author = "Hussain Ahmed Chowdhury and Dhruba Kumar Bhattacharyya and Jugal Kumar Kalita", title = "Differential Expression Analysis of {RNA}-seq Reads: Overview, Taxonomy, and Tools", journal = j-TCBB, volume = "17", number = "2", pages = "566--586", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2873010", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2873010", abstract = "Analysis of RNA-sequence (RNA-seq) data is widely used in transcriptomic studies and it has many applications. We review RNA-seq data analysis from RNA-seq reads to the results of differential expression analysis. In addition, we perform a descriptive \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2020:DLS, author = "Hang Wang and Jianing Xi and Minghui Wang and Ao Li", title = "Dual-Layer Strengthened Collaborative Topic Regression Modeling for Predicting Drug Sensitivity", journal = j-TCBB, volume = "17", number = "2", pages = "587--598", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864739", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2864739", abstract = "An effective way to facilitate the development of modern oncology precision medicine is the systematical analysis of the known drug sensitivities that have emerged in recent years. Meanwhile, the screening of drug response in cancer cell lines provides an \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:EBE, author = "Lishuang Li and Yang Liu and Meiyue Qin", title = "Extracting Biomedical Events with Parallel Multi-Pooling Convolutional Neural Networks", journal = j-TCBB, volume = "17", number = "2", pages = "599--607", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2868078", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2868078", abstract = "Biomedical event extraction is important for medical research and disease prevention, which has attracted much attention in recent years. Traditionally, most of the state-of-the-art systems have been based on shallow machine learning methods, which \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Saribudak:2020:GEH, author = "Aydin Saribudak and Adarsha A. Subick and Na Hyun Kim and Joshua A. Rutta and M. {\"U}mit Uyar", title = "Gene Expressions, Hippocampal Volume Loss, and {MMSE} Scores in Computation of Progression and Pharmacologic Therapy Effects for {Alzheimer}'s Disease", journal = j-TCBB, volume = "17", number = "2", pages = "608--622", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2870363", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2870363", abstract = "We build personalized relevance parameterization method (pr-e p-ad) based on artificial intelligence (ai) techniques to compute Alzheimer's disease (ad) progression for patients at the mild cognitive impairment \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xiao:2020:ILM, author = "Qiu Xiao and Jiawei Luo and Cheng Liang and Guanghui Li and Jie Cai and Pingjian Ding and Ying Liu", title = "Identifying {lncRNA} and {mRNA} Co-Expression Modules from Matched Expression Data in Ovarian Cancer", journal = j-TCBB, volume = "17", number = "2", pages = "623--634", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2864129", abstract = "Long non-coding RNAs (lncRNAs) have been shown to be involved in multiple biological processes and play critical roles in tumorigenesis. Numerous lncRNAs have been discovered in diverse species, but the functions of most lncRNAs still remain unclear. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Karimnezhad:2020:IPK, author = "Ali Karimnezhad and David R. Bickel", title = "Incorporating Prior Knowledge about Genetic Variants into the Analysis of Genetic Association Data: an Empirical {Bayes} Approach", journal = j-TCBB, volume = "17", number = "2", pages = "635--646", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2865420", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2865420", abstract = "In a genome-wide association study (GWAS), the probability that a single nucleotide polymorphism (SNP) is not associated with a disease is its local false discovery rate (LFDR). The LFDR for each SNP is relative to a reference class of SNPs. For example, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:ISO, author = "Rongrong Zhang and Ming Hu and Yu Zhu and Zhaohui Qin and Ke Deng and Jun S. Liu", title = "Inferring Spatial Organization of Individual Topologically Associated Domains via Piecewise Helical Model", journal = j-TCBB, volume = "17", number = "2", pages = "647--656", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2865349", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2865349", abstract = "The recently developed Hi-C technology enables a genome-wide view of chromosome spatial organizations, and has shed deep insights into genome structure and genome function. However, multiple sources of uncertainties make downstream data analysis and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cao:2020:PEF, author = "Zhen Cao and Shihua Zhang", title = "Probe Efficient Feature Representation of Gapped {$K$}-mer Frequency Vectors from Sequences Using Deep Neural Networks", journal = j-TCBB, volume = "17", number = "2", pages = "657--667", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2868071", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2868071", abstract = "Gapped k-mers frequency vectors (gkm-fv) has been presented for extracting sequence features. Coupled with support vector machine (gkm-SVM), gkm-fvs have been used to achieve effective sequence-based predictions. However, the huge computation of a large \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fergus:2020:UDL, author = "Paul Fergus and Casimiro Curbelo Monta{\~n}ez and Basma Abdulaimma and Paulo Lisboa and Carl Chalmers and Beth Pineles", title = "Utilizing Deep Learning and Genome Wide Association Studies for Epistatic-Driven Preterm Birth Classification in {African--American} Women", journal = j-TCBB, volume = "17", number = "2", pages = "668--678", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2868667", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2868667", abstract = "Genome-Wide Association Studies (GWAS) are used to identify statistically significant genetic variants in case-control studies. The main objective is to find single nucleotide polymorphisms (SNPs) that influence a particular phenotype (i.e., disease trait). \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:WSC, author = "Qinhu Zhang and Lin Zhu and Wenzheng Bao and De-Shuang Huang", title = "Weakly-Supervised Convolutional Neural Network Architecture for Predicting Protein--{DNA} Binding", journal = j-TCBB, volume = "17", number = "2", pages = "679--689", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2864203", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2864203", abstract = "Although convolutional neural networks (CNN) have outperformed conventional methods in predicting the sequence specificities of protein-DNA binding in recent years, they do not take full advantage of the intrinsic weakly-supervised information of DNA \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mallik:2020:WWC, author = "Saurav Mallik and Sanghamitra Bandyopadhyay", title = "{WeCoMXP}: Weighted Connectivity Measure Integrating Co-Methylation, Co-Expression and Protein-Protein Interactions for Gene-Module Detection", journal = j-TCBB, volume = "17", number = "2", pages = "690--703", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2868348", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2018.2868348", abstract = "The identification of modules (groups of several tightly interconnected genes) in gene interaction network is an essential task for better understanding of the architecture of the whole network. In this article, we develop a novel weighted connectivity \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fodeh:2020:CPC, author = "Samah J. Fodeh and Taihua Li and Haya Jarad and Basmah Safdar", title = "Classification of Patients with Coronary Microvascular Dysfunction", journal = j-TCBB, volume = "17", number = "2", pages = "704--711", month = mar, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Jun 10 07:29:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/abs/10.1109/TCBB.2019.2914442", abstract = "While coronary microvascular dysfunction (CMD) is a major cause of ischemia, it is very challenging to diagnose due to lack of CMD-specific screening measures. CMD has been identified as one of the five priority areas of investigation in a 2014 National \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zheng:2020:GEI, author = "Jie Zheng and Jinyan Li and Yun Zheng", title = "Guest Editorial for the {29th International Conference on Genome Informatics (GIW 2018)}", journal = j-TCBB, volume = "17", number = "3", pages = "726--727", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2978606", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2978606", abstract = "The six papers in this special section were presented at the 29th International Conference on Genome Informatics (GIW 2018) that was held at Kunming University of Science and Technology, Kunming, China on December 3-5, 2018.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liao:2020:ETA, author = "Xingyu Liao and Min Li and You Zou and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "An Efficient Trimming Algorithm based on Multi-Feature Fusion Scoring Model for {NGS} Data", journal = j-TCBB, volume = "17", number = "3", pages = "728--738", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2897558", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2897558", abstract = "Next-generation sequencing (NGS) has enabled an exponential growth rate of sequencing data. However, several sequence artifacts, including error reads (base calling errors and small insertions or deletions) and poor quality reads, which can impose \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2020:GDI, author = "Shunfang Wang and Zicheng Cao and Mingyuan Li and Yaoting Yue", title = "{G-DipC}: an Improved Feature Representation Method for Short Sequences to Predict the Type of Cargo in Cell-Penetrating Peptides", journal = j-TCBB, volume = "17", number = "3", pages = "739--747", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2930993", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2930993", abstract = "Cell-penetrating peptides (CPPs) are functional short peptides with high carrying capacity. CPP sequences with targeting functions for the highly efficient delivery of drugs to target cells. In this paper, which is focused on the prediction of the cargo \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2020:SSS, author = "Yong Liu and Min Wu and Chenghao Liu and Xiao-Li Li and Jie Zheng", title = "{SL$^2$MF}: Predicting Synthetic Lethality in Human Cancers via Logistic Matrix Factorization", journal = j-TCBB, volume = "17", number = "3", pages = "748--757", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2909908", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2909908", abstract = "Synthetic lethality (SL) is a promising concept for novel discovery of anti-cancer drug targets. However, wet-lab experiments for detecting SLs are faced with various challenges, such as high cost, low consistency across platforms, or cell lines. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Song:2020:EBM, author = "Junrong Song and Wei Peng and Feng Wang", title = "An Entropy-Based Method for Identifying Mutual Exclusive Driver Genes in Cancer", journal = j-TCBB, volume = "17", number = "3", pages = "758--768", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2897931", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2897931", abstract = "Cancer in essence is a complex genomic alteration disease which is caused by the somatic mutations during the lifetime. According to previous researches, the first step to overcome cancer is to identify driver genes which can promote carcinogenesis. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2020:MRA, author = "Jiajie Peng and Linjiao Zhu and Yadong Wang and Jin Chen", title = "Mining Relationships among Multiple Entities in Biological Networks", journal = j-TCBB, volume = "17", number = "3", pages = "769--776", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2904965", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2904965", abstract = "Identifying topological relationships among multiple entities in biological networks is critical towards the understanding of the organizational principles of network functionality. Theoretically, this problem can be solved using minimum Steiner tree \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yao:2020:ADP, author = "Heng Yao and Yunjia Shi and Jihong Guan and Shuigeng Zhou", title = "Accurately Detecting Protein Complexes by Graph Embedding and Combining Functions with Interactions", journal = j-TCBB, volume = "17", number = "3", pages = "777--787", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2897769", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2897769", abstract = "Identifying protein complexes is helpful for understanding cellular functions and designing drugs. In the last decades, many computational methods have been proposed based on detecting dense subgraphs or subnetworks in Protein-Protein Interaction Networks \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2020:CIH, author = "Yuanyuan Ma and Xiaohua Hu and Tingting He and Xingpeng Jiang", title = "Clustering and Integrating of Heterogeneous Microbiome Data by Joint Symmetric Nonnegative Matrix Factorization with {Laplacian} Regularization", journal = j-TCBB, volume = "17", number = "3", pages = "788--795", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2756628", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2017.2756628", abstract = "Many datasets that exists in the real world are often comprised of different representations or views which provide complementary information to each other. To integrate information from multiple views, data integration approaches such as nonnegative \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tan:2020:EEH, author = "Renjie Tan and Jixuan Wang and Xiaoliang Wu and Liran Juan and Tianjiao Zhang and Rui Ma and Qing Zhan and Tao Wang and Shuilin Jin and Qinghua Jiang and Yadong Wang", title = "{ERDS-Exome}: a Hybrid Approach for Copy Number Variant Detection from Whole-Exome Sequencing Data", journal = j-TCBB, volume = "17", number = "3", pages = "796--803", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2758779", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2017.2758779", abstract = "Copy number variants (CNVs) play important roles in human disease and evolution. With the rapid development of next-generation sequencing technologies, many tools have been developed for inferring CNVs based on whole-exome sequencing (WES) data. However, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2020:HSC, author = "Shaoliang Peng and Xiaoyu Zhang and Wenhe Su and Dong Dong and Yutong Lu and Xiangke Liao and Kai Lu and Canqun Yang and Jie Liu and Weiliang Zhu and Dongqing Wei", title = "High-Scalable Collaborated Parallel Framework for Large-Scale Molecular Dynamic Simulation on {Tianhe-2} Supercomputer", journal = j-TCBB, volume = "17", number = "3", pages = "804--816", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2805709", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/super.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2805709", abstract = "Molecular dynamics (MD) is a computer simulation method of studying physical movements of atoms and molecules that provide detailed microscopic sampling on molecular scale. With the continuous efforts and improvements, MD simulation gained popularity in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:IPC, author = "Min Li and Xiangmao Meng and Ruiqing Zheng and Fang-Xiang Wu and Yaohang Li and Yi Pan and Jianxin Wang", title = "Identification of Protein Complexes by Using a Spatial and Temporal Active Protein Interaction Network", journal = j-TCBB, volume = "17", number = "3", pages = "817--827", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2749571", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2017.2749571", abstract = "The rapid development of proteomics and high-throughput technologies has produced a large amount of Protein-Protein Interaction (PPI) data, which makes it possible for considering dynamic properties of protein interaction networks (PINs) instead of static \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Rahman:2020:PPM, author = "Mohammad Arifur Rahman and Nathan LaPierre and Huzefa Rangwala", title = "Phenotype Prediction from Metagenomic Data Using Clustering and Assembly with Multiple Instance Learning {(CAMIL)}", journal = j-TCBB, volume = "17", number = "3", pages = "828--840", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2758782", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2017.2758782", abstract = "The recent advent of Metagenome Wide Association Studies (MGWAS) provides insight into the role of microbes on human health and disease. However, the studies present several computational challenges. In this paper, we demonstrate a novel, efficient, and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:ILM, author = "Lishuang Li and Yuxin Jiang", title = "Integrating Language Model and Reading Control Gate in {BLSTM-CRF} for Biomedical Named Entity Recognition", journal = j-TCBB, volume = "17", number = "3", pages = "841--846", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2868346", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2868346", abstract = "Biomedical named entity recognition (Bio-NER) is an important preliminary step for many biomedical text mining tasks. The current mainstream methods for NER are based on the neural networks to avoid the complex hand-designed features derived from various \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2020:MME, author = "Binbin Wu and Min Li and Xingyu Liao and Junwei Luo and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "{MEC}: Misassembly Error Correction in Contigs based on Distribution of Paired-End Reads and Statistics of {GC-contents}", journal = j-TCBB, volume = "17", number = "3", pages = "847--857", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2876855", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2876855", abstract = "The de novo assembly tools aim at reconstructing genomes from next-generation sequencing (NGS) data. However, the assembly tools usually generate a large amount of contigs containing many misassemblies, which are caused by problems of repetitive regions, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zheng:2020:IIC, author = "Huiru Zheng and Haiying Wang and Richard J. Dewhurst and Rainer Roehe", title = "Improving the Inference of Co-Occurrence Networks in the Bovine Rumen Microbiome", journal = j-TCBB, volume = "17", number = "3", pages = "858--867", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2879342", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2879342", abstract = "The importance of the composition and signature of rumen microbial communities has gained increasing attention. One of the key techniques was to infer co-abundance networks through correlation analysis based on relative abundances. While substantial \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zare:2020:PSC, author = "Fatima Zare and Sardar Ansari and Kayvan Najarian and Sheida Nabavi", title = "Preprocessing Sequence Coverage Data for More Precise Detection of Copy Number Variations", journal = j-TCBB, volume = "17", number = "3", pages = "868--876", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2869738", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2869738", abstract = "Copy number variation (CNV) is a type of genomic/genetic variation that plays an important role in phenotypic diversity, evolution, and disease susceptibility. Next generation sequencing (NGS) technologies have created an opportunity for more accurate \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Luo:2020:GGF, author = "Junwei Luo and Jianxin Wang and Juan Shang and Huimin Luo and Min Li and Fang-Xiang Wu and Yi Pan", title = "{GapReduce}: a Gap Filling Algorithm Based on Partitioned Read Sets", journal = j-TCBB, volume = "17", number = "3", pages = "877--886", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2789909", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2789909", abstract = "With the advances in technologies of sequencing and assembly, draft sequences of more and more genomes are available. However, there commonly exist gaps in these draft sequences which influence various downstream analysis of biological studies. Gap \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2020:DRL, author = "Haigen Hu and Qiu Guan and Shengyong Chen and Zhiwei Ji and Yao Lin", title = "Detection and Recognition for Life State of Cell Cancer Using Two-Stage Cascade {CNNs}", journal = j-TCBB, volume = "17", number = "3", pages = "887--898", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2780842", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2017.2780842", abstract = "Cancer cell detection and its stages recognition of life cycle are an important step to analyze cellular dynamics in the automation of cell based-experiments. In this work, a two-stage hierarchical method is proposed to detect and recognize different life \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2020:BLE, author = "Yuansheng Liu and Chaowang Lan and Michael Blumenstein and Jinyan Li", title = "Bi-Level Error Correction for {PacBio} Long Reads", journal = j-TCBB, volume = "17", number = "3", pages = "899--905", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2780832", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2017.2780832", abstract = "The latest sequencing technologies such as the Pacific Biosciences (PacBio) and Oxford Nanopore machines can generate long reads at the length of thousands of nucleic bases which is much longer than the reads at the length of hundreds generated by \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ni:2020:CDS, author = "Peng Ni and Jianxin Wang and Ping Zhong and Yaohang Li and Fang-Xiang Wu and Yi Pan", title = "Constructing Disease Similarity Networks Based on Disease Module Theory", journal = j-TCBB, volume = "17", number = "3", pages = "906--915", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2817624", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2817624", abstract = "Quantifying the associations between diseases is now playing an important role in modern biology and medicine. Actually discovering associations between diseases could help us gain deeper insights into pathogenic mechanisms of complex diseases, thus could \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2020:FEC, author = "Zhi-Zhong Chen and Youta Harada and Yuna Nakamura and Lusheng Wang", title = "Faster Exact Computation of {rSPR} Distance via Better Approximation", journal = j-TCBB, volume = "17", number = "3", pages = "916--929", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2878731", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2878731", abstract = "Due to hybridization events in evolution, studying two different genes of a set of species may yield two related but different phylogenetic trees for the set of species. In this case, we want to measure the dissimilarity of the two trees. The rooted \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{He:2020:IAC, author = "Zaobo He and Jiguo Yu and Ji Li and Qilong Han and Guangchun Luo and Yingshu Li", title = "Inference Attacks and Controls on Genotypes and Phenotypes for Individual Genomic Data", journal = j-TCBB, volume = "17", number = "3", pages = "930--937", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2810180", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2810180", abstract = "The rapid growth of DNA-sequencing technologies motivates more personalized and predictive genetic-oriented services, which further attract individuals to increasingly release their genome information to learn about personalized medicines, disease \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2020:INF, author = "Jin Zhao and Haodi Feng and Daming Zhu and Chi Zhang and Ying Xu", title = "{IsoTree}: a New Framework for de novo Transcriptome Assembly from {RNA-seq} Reads", journal = j-TCBB, volume = "17", number = "3", pages = "938--948", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2808350", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2808350", abstract = "High-throughput sequencing of mRNA has made the deep and efficient probing of transcriptome more affordable. However, the vast amounts of short RNA-seq reads make de novo transcriptome assembly an algorithmic challenge. In this work, we present IsoTree, a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:EMM, author = "Jingsong Zhang and Jianmei Guo and Ming Zhang and Xiangtian Yu and Xiaoqing Yu and Weifeng Guo and Tao Zeng and Luonan Chen", title = "Efficient Mining Multi-Mers in a Variety of Biological Sequences", journal = j-TCBB, volume = "17", number = "3", pages = "949--958", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2828313", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2828313", abstract = "Counting the occurrence frequency of each k-mer in a biological sequence is a preliminary yet important step in many bioinformatics applications. However, most k-mer counting algorithms rely on a given k to produce single-length k-mers, which is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Boukari:2020:ACT, author = "Fatima Boukari and Sokratis Makrogiannis", title = "Automated Cell Tracking Using Motion Prediction-Based Matching and Event Handling", journal = j-TCBB, volume = "17", number = "3", pages = "959--971", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2875684", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2875684", abstract = "Automated cell segmentation and tracking enables the quantification of static and dynamic cell characteristics and is significant for disease diagnosis, treatment, drug development, and other biomedical applications. This paper introduces a method for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2020:CHS, author = "Lei Wang and Zhu-Hong You and De-Shuang Huang and Fengfeng Zhou", title = "Combining High Speed {ELM} Learning with a Deep Convolutional Neural Network Feature Encoding for Predicting {Protein-RNA} Interactions", journal = j-TCBB, volume = "17", number = "3", pages = "972--980", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2874267", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2874267", abstract = "Emerging evidence has shown that RNA plays a crucial role in many cellular processes, and their biological functions are primarily achieved by binding with a variety of proteins. High-throughput biological experiments provide a lot of valuable information \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kang:2020:CGW, author = "Qiwen Kang and Neil Moore and Christopher L. Schardl and Ruriko Yoshida", title = "{CURatio}: Genome-Wide Phylogenomic Analysis Method Using Ratios of Total Branch Lengths", journal = j-TCBB, volume = "17", number = "3", pages = "981--989", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2878564", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2878564", abstract = "Evolutionary hypotheses provide important underpinnings of biological and medical sciences, and comprehensive, genome-wide understanding of evolutionary relationships among organisms are needed to test and refine such hypotheses. Theory and empirical \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Som-In:2020:EPS, author = "Sarawoot Som-In and Warangkhana Kimpan", title = "Enhancing of Particle Swarm Optimization Based Method for Multiple Motifs Detection in {DNA} Sequences Collections", journal = j-TCBB, volume = "17", number = "3", pages = "990--998", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2872978", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2872978", abstract = "Genome sequence data consists of DNA sequences or input sequences. Each one includes nucleotides with chemical structures presented as characters: `A', `C', 'G', and `T', and groups of motif sequences, called Transcription Factor Binding Sites (TFBSs), \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bakhteh:2020:IMS, author = "Somayeh Bakhteh and Alireza Ghaffari-Hadigheh and Nader Chaparzadeh", title = "Identification of Minimum Set of Master Regulatory Genes in Gene Regulatory Networks", journal = j-TCBB, volume = "17", number = "3", pages = "999--1009", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2875692", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2875692", abstract = "Identification of master regulatory genes is one of the primary challenges in systems biology. The minimum dominating set problem is a powerful paradigm in analyzing such complex networks. In these models, genes stand as nodes and their interactions are \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Vundavilli:2020:SDE, author = "Haswanth Vundavilli and Aniruddha Datta and Chao Sima and Jianping Hua and Rosana Lopes and Michael Bittner", title = "In Silico Design and Experimental Validation of Combination Therapy for Pancreatic Cancer", journal = j-TCBB, volume = "17", number = "3", pages = "1010--1018", month = may, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2872573", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:32 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2018.2872573", abstract = "The number of deaths associated with Pancreatic Cancer has been on the rise in the United States making it an especially dreaded disease. The overall prognosis for pancreatic cancer patients continues to be grim because of the complexity of the disease at \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2020:GES, author = "De-Shuang Huang and Vitoantonio Bevilacqua and Michael Gromiha", title = "Guest Editorial for Special Section on the {14th International Conference on Intelligent Computing (ICIC)}", journal = j-TCBB, volume = "17", number = "5", pages = "1474--1475", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2989800", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2989800", abstract = "The papers in this special section were presented at the Fourteenth International Conference on Intelligent Computing (ICIC) held in Wuhan, China, on August 15-18, 2018.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lee:2020:CPP, author = "Wook Lee and Kyungsook Han", title = "Constructive Prediction of Potential {RNA} Aptamers for a Protein Target", journal = j-TCBB, volume = "17", number = "5", pages = "1476--1482", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2951114", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2951114", abstract = "Aptamers are short single-stranded nucleic acids that bind to target molecules with high affinity and selectivity. Aptamers are generally identified in vitro by performing SELEX (systematic evolution of ligands by exponential enrichment). Complementing \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shen:2020:CNP, author = "Zhen Shen and Su-Ping Deng and De-Shuang Huang", title = "Capsule Network for Predicting {RNA}--Protein Binding Preferences Using Hybrid Feature", journal = j-TCBB, volume = "17", number = "5", pages = "1483--1492", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2943465", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2943465", abstract = "RNA-Protein binding is involved in many different biological processes. With the progress of technology, more and more data are available for research. Based on these data, many prediction methods have been proposed to predict RNA-Protein binding \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jia:2020:PIM, author = "Huiqiang Jia and Haichao Wei and Daming Zhu and Jingjing Ma and Hai Yang and Ruizhi Wang and Xianzhong Feng", title = "{PASA}: Identifying More Credible Structural Variants of {Hedou12}", journal = j-TCBB, volume = "17", number = "5", pages = "1493--1503", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2934463", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2934463", abstract = "Although plenty of structural variant detecting approaches for human genomes can be looked up in the literatures, little has been acknowledged on the effectiveness of those structural variant softwares for plant genomes. Moreover, it has been demonstrated \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2020:JIV, author = "Jian Liu and Zhi Qu and Mo Yang and Jialiang Sun and Shuhui Su and Lei Zhang", title = "Jointly Integrating {VCF}-Based Variants and {OWL}-Based Biomedical Ontologies in {MongoDB}", journal = j-TCBB, volume = "17", number = "5", pages = "1504--1515", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2951137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2951137", abstract = "The development of the next-generation sequencing (NGS) technologies has led to massive amounts of VCF (Variant Call Format) files, which have been the standard formats developed with 1000 Genomes Project. At the same time, with the widespread use of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2020:LMN, author = "Pengwei Hu and Yu-An Huang and Keith C. C. Chan and Zhu-Hong You", title = "Learning Multimodal Networks From Heterogeneous Data for Prediction of {lncRNA--miRNA} Interactions", journal = j-TCBB, volume = "17", number = "5", pages = "1516--1524", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2957094", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2957094", abstract = "Long noncoding RNAs (lncRNAs) is an important class of non-protein coding RNAs. They have recently been found to potentially be able to act as a regulatory molecule in some important biological processes. MicroRNAs (miRNAs) have been confirmed to be \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2020:ECH, author = "Xiaoli Lin and Xiaolong Zhang and Xin Xu", title = "Efficient Classification of Hot Spots and Hub Protein Interfaces by Recursive Feature Elimination and Gradient Boosting", journal = j-TCBB, volume = "17", number = "5", pages = "1525--1534", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2931717", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2931717", abstract = "Proteins are not isolated biological molecules, which have the specific three-dimensional structures and interact with other proteins to perform functions. A small number of residues (hot spots) in protein-protein interactions (PPIs) play the vital role \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hind:2020:NAD, author = "Jade Hind and Paulo Lisboa and Abir J. Hussain and Dhiya Al-Jumeily", title = "A Novel Approach to Detecting Epistasis using Random Sampling Regularisation", journal = j-TCBB, volume = "17", number = "5", pages = "1535--1545", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2948330", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2948330", abstract = "Epistasis is a progressive approach that complements the `common disease, common variant' hypothesis that highlights the potential for connected networks of genetic variants collaborating to produce a phenotypic expression. Epistasis is commonly performed \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2020:UWE, author = "Jianqiang Li and Xiaofeng Shi and Zhu-Hong You and Hai-Cheng Yi and Zhuangzhuang Chen and Qiuzhen Lin and Min Fang", title = "Using Weighted Extreme Learning Machine Combined With Scale-Invariant Feature Transform to Predict Protein-Protein Interactions From Protein Evolutionary Information", journal = j-TCBB, volume = "17", number = "5", pages = "1546--1554", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2965919", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2965919", abstract = "Protein-Protein Interactions (PPIs) play an irreplaceable role in biological activities of organisms. Although many high-throughput methods are used to identify PPIs from different kinds of organisms, they have some shortcomings, such as high cost and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shahdoust:2020:NBC, author = "Maryam Shahdoust and Hossein Mahjub and Hamid Pezeshk and Mehdi Sadeghi", title = "A Network-Based Comparison Between Molecular Apocrine Breast Cancer Tumor and Basal and Luminal Tumors by Joint Graphical Lasso", journal = j-TCBB, volume = "17", number = "5", pages = "1555--1562", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911074", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911074", abstract = "Joint graphical lasso (JGL) approach is a Gaussian graphical model to estimate multiple graphical models corresponding to distinct but related groups. Molecular apocrine (MA) breast cancer tumor has similar characteristics to luminal and basal subtypes. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2020:NEE, author = "Qian Guo and Tianhong Pan and Shan Chen and Xiaobo Zou and Dorothy Yu Huang", title = "A Novel Edge Effect Detection Method for Real-Time Cellular Analyzer Using Functional Principal Component Analysis", journal = j-TCBB, volume = "17", number = "5", pages = "1563--1572", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2903094", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2903094", abstract = "Real-time cellular analyzer (RTCA) has been generally applied to test the cytotoxicity of chemicals. However, several factors impact the experimental quality. A non-negligible factor is the abnormal time-dependent cellular response curves (TCRCs) of the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sharma:2020:ESL, author = "Nirmala Sharma and Harish Sharma and Ajay Sharma", title = "An Effective Solution for Large Scale Single Machine Total Weighted Tardiness Problem using Lunar Cycle Inspired Artificial Bee Colony Algorithm", journal = j-TCBB, volume = "17", number = "5", pages = "1573--1581", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2897302", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2897302", abstract = "Single machine total weighted tardiness problem (SMTWTP) is one of the fundamental combinatorial optimization problems. The problem consists of a set of independent jobs with distinct processing times, weights, and due dates to be scheduled on a single \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sarkar:2020:EAI, author = "Aisharjya Sarkar and Yilmaz Atay and Alana Lorraine Erickson and Ivan Arisi and Cesare Saltini and Tamer Kahveci", title = "An Efficient Algorithm for Identifying Mutated Subnetworks Associated with Survival in Cancer", journal = j-TCBB, volume = "17", number = "5", pages = "1582--1594", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911069", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911069", abstract = "Protein-protein interaction (PPI) network models interconnections between protein-encoding genes. A group of proteins that perform similar functions are often connected to each other in the PPI network. The corresponding genes form pathways or functional \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2020:BPM, author = "Cheng Yan and Guihua Duan and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "{BRWMDA:Predicting} Microbe-Disease Associations Based on Similarities and Bi-Random Walk on Disease and Microbe Networks", journal = j-TCBB, volume = "17", number = "5", pages = "1595--1604", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2907626", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2907626", abstract = "Many current studies have evidenced that microbes play important roles in human diseases. Therefore, discovering the associations between microbes and diseases is beneficial to systematically understanding the mechanisms of diseases, diagnosing, and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2020:CNB, author = "Chen Peng and Yang Zheng and De-Shuang Huang", title = "Capsule Network Based Modeling of Multi-omics Data for Discovery of Breast Cancer-Related Genes", journal = j-TCBB, volume = "17", number = "5", pages = "1605--1612", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2909905", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2909905", abstract = "Breast cancer is one of the most common cancers all over the world, which bring about more than 450,000 deaths each year. Although this malignancy has been extensively studied by a large number of researchers, its prognosis is still poor. Since \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yeganeh:2020:CDA, author = "Pourya Naderi Yeganeh and M. Taghi Mostafavi", title = "Causal Disturbance Analysis: a Novel Graph Centrality Based Method for Pathway Enrichment Analysis", journal = j-TCBB, volume = "17", number = "5", pages = "1613--1624", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2907246", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2907246", abstract = "Pathway enrichment analysis models (PEM) are the premier methods for interpreting gene expression profiles from high-throughput experiments. PEM often use a priori background knowledge to infer the underlying biological functions and mechanisms. A \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Filipovic:2020:CNM, author = "Jir{\'\i} Filipovic and Ondrej V{\'a}vra and Jan Plh{\'a}k and David Bedn{\'a}r and S{\'e}rgio M. Marques and Jan Brezovsk{\'y} and Ludek Matyska and Jir{\'\i} Damborsk{\'y}", title = "{CaverDock}: a Novel Method for the Fast Analysis of Ligand Transport", journal = j-TCBB, volume = "17", number = "5", pages = "1625--1638", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2907492", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2907492", abstract = "Here we present a novel method for the analysis of transport processes in proteins and its implementation called CaverDock. Our method is based on a modified molecular docking algorithm. It iteratively places the ligand along the access tunnel in such a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zeng:2020:DCF, author = "Xiangxiang Zeng and Yinglai Lin and Yuying He and Linyuan L{\"u} and Xiaoping Min and Alfonso Rodr{\'\i}guez-Pat{\'o}n", title = "Deep Collaborative Filtering for Prediction of Disease Genes", journal = j-TCBB, volume = "17", number = "5", pages = "1639--1647", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2907536", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2907536", abstract = "Accurate prioritization of potential disease genes is a fundamental challenge in biomedical research. Various algorithms have been developed to solve such problems. Inductive Matrix Completion (IMC) is one of the most reliable models for its well-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ranjan:2020:DRF, author = "Ashish Ranjan and Md Shah Fahad and David Fern{\'a}ndez-Baca and Akshay Deepak and Sudhakar Tripathi", title = "Deep Robust Framework for Protein Function Prediction Using Variable-Length Protein Sequences", journal = j-TCBB, volume = "17", number = "5", pages = "1648--1659", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911609", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911609", abstract = "The order of amino acids in a protein sequence enables the protein to acquire a conformation suitable for performing functions, thereby motivating the need to analyze these sequences for predicting functions. Although machine learning based approaches are \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sharma:2020:ISE, author = "Sunildatt Sharma and Sanjeev Narayan Sharma and Rajiv Saxena", title = "Identification of Short Exons Disunited by a Short Intron in Eukaryotic {DNA} Regions", journal = j-TCBB, volume = "17", number = "5", pages = "1660--1670", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2900040", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2900040", abstract = "Weak codon bias in short exons and separation by a short intron induces difficulty in extracting period-3 component that marks the presence of exonic regions. The annotation task of such short exons has been addressed in the proposed model independent \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2020:IIE, author = "Min Wang and Ting-Zhu Huang and Jian Fang and Vince D. Calhoun and Yu-Ping Wang", title = "Integration of Imaging (epi)Genomics Data for the Study of Schizophrenia Using Group Sparse Joint Nonnegative Matrix Factorization", journal = j-TCBB, volume = "17", number = "5", pages = "1671--1681", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2899568", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2899568", abstract = "Schizophrenia (SZ) is a complex disease. Single nucleotide polymorphism (SNP), brain activity measured by functional magnetic resonance imaging (fMRI) and DNA methylation are all important biomarkers that can be used for the study of SZ. To our knowledge, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2020:MNM, author = "Lan Zhao and Hong Yan", title = "{MCNF}: a Novel Method for Cancer Subtyping by Integrating Multi-Omics and Clinical Data", journal = j-TCBB, volume = "17", number = "5", pages = "1682--1690", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2910515", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2910515", abstract = "In the age of personalized medicine, there is a great need to classify cancer (from the same organ site) into homogeneous subtypes. Recent technology advancements in genome-wide molecular profiling have made it possible to profiling multiple molecular \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pichene:2020:MVP, author = "Matthieu Pichen{\'e} and Sucheendra K. Palaniappan and Eric Fabre and Blaise Genest", title = "Modeling Variability in Populations of Cells Using Approximated Multivariate Distributions", journal = j-TCBB, volume = "17", number = "5", pages = "1691--1702", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2904276", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2904276", abstract = "We are interested in studying the evolution of large homogeneous populations of cells, where each cell is assumed to be composed of a group of biological players (species) whose dynamics is governed by a complex biological pathway, identical for all \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:ODP, author = "Yuan Zhang and Haihong Liu and Zhouhong Li and Zhonghua Miao and Jin Zhou", title = "Oscillatory Dynamics of {p53-Mdm2} Circuit in Response to {DNA} Damage Caused by Ionizing Radiation", journal = j-TCBB, volume = "17", number = "5", pages = "1703--1713", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2899574", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2899574", abstract = "Although the dynamical behavior of the p53-Mdm2 loop has been extensively studied, the understanding of the mechanism underlying the regulation of this pathway still remains limited. Herein, we developed an integrated model with five basic components and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2020:PNC, author = "Sijia Wu and Xiaoming Wu and Jie Tian and Xiaobo Zhou and Liyu Huang", title = "{PredictFP2}: a New Computational Model to Predict Fusion Peptide Domain in All Retroviruses", journal = j-TCBB, volume = "17", number = "5", pages = "1714--1720", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2898943", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2898943", abstract = "Fusion peptide (FP) is a pivotal domain for the entry of retrovirus into host cells to continue self-replication. The crucial role indicates that FP is a promising drug target for therapeutic intervention. A FP model proposed in our previous work is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fu:2020:PDM, author = "Laiyi Fu and Qinke Peng and Ling Chai", title = "Predicting {DNA} Methylation States with Hybrid Information Based Deep-Learning Model", journal = j-TCBB, volume = "17", number = "5", pages = "1721--1728", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2909237", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2909237", abstract = "DNA methylation plays an important role in the regulation of some biological processes. Up to now, with the development of machine learning models, there are several sequence-based deep learning models designed to predict DNA methylation states, which \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Paul:2020:IRS, author = "Sushmita Paul and Madhumita", title = "{RFCM$^3$}: Computational Method for Identification of {miRNA--mRNA} Regulatory Modules in Cervical Cancer", journal = j-TCBB, volume = "17", number = "5", pages = "1729--1740", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2910851", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2910851", abstract = "Cervical cancer is a leading severe malignancy throughout the world. Molecular processes and biomarkers leading to tumor progression in cervical cancer are either unknown or only partially understood. An increasing number of studies have shown that \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shen:2020:RPB, author = "Zhen Shen and Su-Ping Deng and De-Shuang Huang", title = "{RNA-Protein} Binding Sites Prediction via Multi Scale Convolutional Gated Recurrent Unit Networks", journal = j-TCBB, volume = "17", number = "5", pages = "1741--1750", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2910513", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2910513", abstract = "RNA-Protein binding plays important roles in the field of gene expression. With the development of high throughput sequencing, several conventional methods and deep learning-based methods have been proposed to predict the binding preference of RNA-protein \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Raza:2020:SPF, author = "Saad Raza and Ghulam Abbas and Syed Sikander Azam", title = "Screening Pipeline for Flavivirus Based Inhibitors for {Zika} Virus {NS1}", journal = j-TCBB, volume = "17", number = "5", pages = "1751--1761", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911081", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911081", abstract = "In-silico pipeline is applied for identifying and designing novel inhibitors against ZIKV NS1 protein. Comparative molecular docking studies are performed to explore the binding of structurally diverse compounds to ZIKV NS1 by AutoDock/Vina and GOLD. The \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kawano:2020:SFB, author = "Keisuke Kawano and Satoshi Koide and Chie Imamura", title = "{Seq2seq} Fingerprint with Byte-Pair Encoding for Predicting Changes in Protein Stability upon Single Point Mutation", journal = j-TCBB, volume = "17", number = "5", pages = "1762--1772", month = sep, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2908641", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:34 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2908641", abstract = "The engineering of stable proteins is crucial for various industrial purposes. Several machine learning methods have been developed to predict changes in the stability of proteins corresponding to single point mutations. To improve the prediction accuracy,. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2020:GES, author = "Da Yan and Xin Gao and Samah J. Fodeh and Jake Y. Chen", title = "Guest Editorial for Selected Papers from {BIOKDD 2018} and {DMBIH 2018}", journal = j-TCBB, volume = "17", number = "6", pages = "1832--1834", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3020443", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3020443", abstract = "The papers in this special issue were presented at the 2018 17th International Workshop on Data Mining in Bioinformatics (BIOKDD), held in conjunction with the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. The Workshop was \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sokolovsky:2020:DLA, author = "Michael Sokolovsky and Francisco Guerrero and Sarun Paisarnsrisomsuk and Carolina Ruiz and Sergio A. Alvarez", title = "Deep Learning for Automated Feature Discovery and Classification of Sleep Stages", journal = j-TCBB, volume = "17", number = "6", pages = "1835--1845", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2912955", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2912955", abstract = "Convolutional neural networks (CNN) have demonstrated state-of-the-art classification results in image categorization, but have received comparatively little attention for classification of one-dimensional physiological signals. We design a deep CNN \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{McDermott:2020:DLB, author = "Matthew B. A. McDermott and Jennifer Wang and Wen-Ning Zhao and Steven D. Sheridan and Peter Szolovits and Isaac Kohane and Stephen J. Haggarty and Roy H. Perlis", title = "Deep Learning Benchmarks on L1000 Gene Expression Data", journal = j-TCBB, volume = "17", number = "6", pages = "1846--1857", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2910061", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2910061", abstract = "Gene expression data can offer deep, physiological insights beyond the static coding of the genome alone. We believe that realizing this potential requires specialized, high-capacity machine learning methods capable of using underlying biological \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zheng:2020:DDA, author = "Jingyi Zheng and Fushing Hsieh and Linqiang Ge", title = "A Data-Driven Approach to Predict and Classify Epileptic Seizures from Brain-Wide Calcium Imaging Video Data", journal = j-TCBB, volume = "17", number = "6", pages = "1858--1870", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2895077", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2895077", abstract = "The prediction of epileptic seizures has been an essential problem of epilepsy study. The calcium imaging video data images the whole brain-wide neurons activities with electrical discharge recorded by calcium fluorescence intensity (CFI). In this paper, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2020:CCP, author = "Hongming Xu and Sunho Park and Tae Hyun Hwang", title = "Computerized Classification of Prostate Cancer {Gleason} Scores from Whole Slide Images", journal = j-TCBB, volume = "17", number = "6", pages = "1871--1882", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2941195", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2941195", abstract = "Histological Gleason grading of tumor patterns is one of the most powerful prognostic predictors in prostate cancer. However, manual analysis and grading performed by pathologists are typically subjective and time-consuming. In this paper, we present an \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pathak:2020:PSR, author = "Shreyasi Pathak and Jorit van Rossen and Onno Vijlbrief and Jeroen Geerdink and Christin Seifert and Maurice van Keulen", title = "Post-Structuring Radiology Reports of Breast Cancer Patients for Clinical Quality Assurance", journal = j-TCBB, volume = "17", number = "6", pages = "1883--1894", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914678", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914678", abstract = "Hospitals often set protocols based on well defined standards to maintain the quality of patient reports. To ensure that the clinicians conform to the protocols, quality assurance of these reports is needed. Patient reports are currently written in free-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Noriega-Atala:2020:EIS, author = "Enrique Noriega-Atala and Paul D. Hein and Shraddha S. Thumsi and Zechy Wong and Xia Wang and Sean M. Hendryx and Clayton T. Morrison", title = "Extracting Inter-Sentence Relations for Associating Biological Context with Events in Biomedical Texts", journal = j-TCBB, volume = "17", number = "6", pages = "1895--1906", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2904231", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2904231", abstract = "We present an analysis of the problem of identifying biological context and associating it with biochemical events described in biomedical texts. This constitutes a non-trivial, inter-sentential relation extraction task. We focus on biological context as \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gibbs:2020:AVS, author = "Jonathon A. Gibbs and Michael P. Pound and Andrew P. French and Darren M. Wells and Erik H. Murchie and Tony P. Pridmore", title = "Active Vision and Surface Reconstruction for {$3$D} Plant Shoot Modelling", journal = j-TCBB, volume = "17", number = "6", pages = "1907--1917", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2896908", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2896908", abstract = "Plant phenotyping is the quantitative description of a plant&\#x0027;s physiological, biochemical, and anatomical status which can be used in trait selection and helps to provide mechanisms to link underlying genetics with yield. Here, an active vision- \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jing:2020:AAE, author = "Xiaoyang Jing and Qiwen Dong and Daocheng Hong and Ruqian Lu", title = "Amino Acid Encoding Methods for Protein Sequences: a Comprehensive Review and Assessment", journal = j-TCBB, volume = "17", number = "6", pages = "1918--1931", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911677", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911677", abstract = "As the first step of machine-learning based protein structure and function prediction, the amino acid encoding play a fundamental role in the final success of those methods. Different from the protein sequence encoding, the amino acid encoding can be used \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Paul:2020:EAT, author = "Soumya Paul and Cui Su and Jun Pang and Andrzej Mizera", title = "An Efficient Approach Towards the Source-Target Control of {Boolean} Networks", journal = j-TCBB, volume = "17", number = "6", pages = "1932--1945", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2915081", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2915081", abstract = "We study the problem of computing a minimal subset of nodes of a given asynchronous Boolean network that need to be perturbed in a single-step to drive its dynamics from an initial state to a target steady state (or {$<$ italic$>$ attractor$<$}/{italic$>$}), which we \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qingge:2020:AOS, author = "Letu Qingge and Killian Smith and Sean Jungst and Baihui Wang and Qing Yang and Binhai Zhu", title = "Approaching the One-Sided Exemplar Adjacency Number Problem", journal = j-TCBB, volume = "17", number = "6", pages = "1946--1954", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2913834", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2913834", abstract = "The one-sided Exemplar Adjacency Number (EAN) is a known problem for computing the exemplar similarity between a generic linear genome {$<$ inline}-{formula$ > $$ <$ t e x} - math notation = ``LaTeX''{ $ >$}${\mathcal G}${$ <$ } / tex - math{ $ >$ }{ $ <$ }alternatives{ $ >$ }{ $ <$ }mml : math{ $ >$ }{ $ <$ }mml : mi mathvariant = ``script''{ $ >$ }. \ldots {}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Karim:2020:BNB, author = "Mohammad Bozlul Karim and Ming Huang and Naoaki Ono and Shigehiko Kanaya and Md. Altaf-Ul-Amin", title = "{BiClusO}: a Novel Biclustering Approach and Its Application to Species-{VOC} Relational Data", journal = j-TCBB, volume = "17", number = "6", pages = "1955--1965", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914901", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914901", abstract = "In this paper, we propose a novel biclustering approach called BiClusO. Biclustering can be applied to various types of bipartite data such as gene-condition or gene-disease relations. For example, we applied BiClusO to bipartite relations between species \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2020:BBP, author = "Guang-Hui Liu and Bei-Wei Zhang and Gang Qian and Bin Wang and Bo Mao and Isabelle Bichindaritz", title = "Bioimage-Based Prediction of Protein Subcellular Location in Human Tissue with Ensemble Features and Deep Networks", journal = j-TCBB, volume = "17", number = "6", pages = "1966--1980", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2917429", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2917429", abstract = "Prediction of protein subcellular location has currently become a hot topic because it has been proven to be useful for understanding both the disease mechanisms and novel drug design. With the rapid development of automated microscopic imaging technology \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Paoletti:2020:DDR, author = "Nicola Paoletti and Kin Sum Liu and Hongkai Chen and Scott A. Smolka and Shan Lin", title = "Data-Driven Robust Control for a Closed-Loop Artificial Pancreas", journal = j-TCBB, volume = "17", number = "6", pages = "1981--1993", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2912609", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2912609", abstract = "We present a fully closed-loop design for an artificial pancreas (AP) that regulates the delivery of insulin for the control of Type I diabetes. Our AP controller operates in a fully automated fashion, without requiring any manual interaction with the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2020:EMX, author = "Jian Liu and Qiuru Liu and Lei Zhang and Shuhui Su and Yongzhuang Liu", title = "Enabling Massive {XML}-Based Biological Data Management in {HBase}", journal = j-TCBB, volume = "17", number = "6", pages = "1994--2004", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2915811", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2915811", abstract = "Publishing biological data in XML formats is attractive for organizations who would like to provide their bioinformatics resources in an extensible and machine-readable format. In the era of big data, massive XML-based biological data management is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dutta:2020:EGC, author = "Pratik Dutta and Sriparna Saha and Saraansh Chopra and Varnika Miglani", title = "Ensembling of Gene Clusters Utilizing Deep Learning and Protein-Protein Interaction Information", journal = j-TCBB, volume = "17", number = "6", pages = "2005--2016", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2918523", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2918523", abstract = "Cluster ensemble techniques aim to combine the outputs of multiple clustering algorithms to obtain a single consensus partitioning. The current paper reports about the development of a cluster ensemble based technique combining the concepts of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2020:ICI, author = "Lun Hu and Pengwei Hu and Xin Luo and Xiaohui Yuan and Zhu-Hong You", title = "Incorporating the Coevolving Information of Substrates in Predicting {HIV-1} Protease Cleavage Sites", journal = j-TCBB, volume = "17", number = "6", pages = "2017--2028", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914208", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914208", abstract = "Human immunodeficiency virus 1 (HIV-1) protease (PR) plays a crucial role in the maturation of the virus. The study of substrate specificity of HIV-1 PR as a new endeavor strives to increase our ability to understand how HIV-1 PR recognizes its various \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2020:LBE, author = "Xinyi Yu and Wenge Rong and Jingshuang Liu and Deyu Zhou and Yuanxin Ouyang and Zhang Xiong", title = "{LSTM}-Based End-to-End Framework for Biomedical Event Extraction", journal = j-TCBB, volume = "17", number = "6", pages = "2029--2039", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2916346", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2916346", abstract = "Biomedical event extraction plays an important role in the extraction of biological information from large-scale scientific publications. However, most state-of-the-art systems separate this task into several steps, which leads to cascading errors. In \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kazemi:2020:MSA, author = "Ehsan Kazemi and Matthias Grossglauser", title = "{MPGM}: Scalable and Accurate Multiple Network Alignment", journal = j-TCBB, volume = "17", number = "6", pages = "2040--2052", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914050", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914050", abstract = "Protein-protein interaction (PPI) network alignment is a canonical operation to transfer biological knowledge among species. The alignment of PPI-networks has many applications, such as the prediction of protein function, detection of conserved network \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:PEP, author = "Wei Zhang and Jia Xu and Xiufen Zou", title = "Predicting Essential Proteins by Integrating Network Topology, Subcellular Localization Information, Gene Expression Profile and {GO} Annotation Data", journal = j-TCBB, volume = "17", number = "6", pages = "2053--2061", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2916038", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2916038", abstract = "Essential proteins are indispensable for maintaining normal cellular functions. Identification of essential proteins from Protein-protein interaction (PPI) networks has become a hot topic in recent years. Traditionally biological experimental based \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{He:2020:PCI, author = "Zengyou He and Can Zhao and Hao Liang and Bo Xu and Quan Zou", title = "Protein Complexes Identification with Family-Wise Error Rate Control", journal = j-TCBB, volume = "17", number = "6", pages = "2062--2073", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2912602", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2912602", abstract = "The detection of protein complexes from protein-protein interaction network is a fundamental issue in bioinformatics and systems biology. To solve this problem, numerous methods have been proposed from different angles in the past decades. However, the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shrestha:2020:SMD, author = "Midusha Shrestha and Truong X. Tran and Bidhan Bhattarai and Marc L. Pusey and Ramazan S. Aygun", title = "Schema Matching and Data Integration with Consistent Naming on Protein Crystallization Screens", journal = j-TCBB, volume = "17", number = "6", pages = "2074--2085", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2913368", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2913368", abstract = "The data representation as well as naming conventions used in commercial screen files by different companies make the automated analysis of crystallization experiments difficult and time-consuming. In order to reduce the human effort required to deal with \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Carroll:2020:SSA, author = "Thomas C. Carroll and Jude-Thaddeus Ojiaku and Prudence W. H. Wong", title = "Semiglobal Sequence Alignment with Gaps Using {GPU}", journal = j-TCBB, volume = "17", number = "6", pages = "2086--2097", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914105", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914105", abstract = "In this paper, we consider the pair-wise semiglobal sequence alignment problem with gaps, which is motivated by the {$<$ italic$>$ re}-{sequencing$<$}/{italic$>$} problem that requires to assemble short reads sequences into a genome sequence by referring to a reference \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Azuma:2020:SBA, author = "Shun-Ichi Azuma and Toshimitsu Kure and Toshiharu Sugie", title = "Structural Bistability Analysis of Flower-Shaped and Chain-Shaped {Boolean} Networks", journal = j-TCBB, volume = "17", number = "6", pages = "2098--2106", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2917196", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2917196", abstract = "{$<$ italic$>$Bistability$<$}/{italic$>$}, i.e., the existence of just two stable equilibria, is known to play an important role in biological systems, e.g., cellular differentiation and apoptosis. In this paper, we consider the bistability but as a {$<$ italic$>$ structural} \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Rhodes:2020:TMT, author = "John A. Rhodes", title = "Topological Metrizations of Trees, and New Quartet Methods of Tree Inference", journal = j-TCBB, volume = "17", number = "6", pages = "2107--2118", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2917204", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2917204", abstract = "Topological phylogenetic trees can be assigned edge weights in several natural ways, highlighting different aspects of the tree. Here, the rooted triple and quartet metrizations are introduced, and applied to formulate novel methods of inferring large \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2020:TSD, author = "Gui-Jun Zhang and Xiao-Qi Wang and Lai-Fa Ma and Liu-Jing Wang and Jun Hu and Xiao-Gen Zhou", title = "Two-Stage Distance Feature-based Optimization Algorithm for {{\em De novo\/}} Protein Structure Prediction", journal = j-TCBB, volume = "17", number = "6", pages = "2119--2130", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2917452", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2917452", abstract = "De novo protein structure prediction can be treated as a conformational space optimization problem under the guidance of an energy function. However, it is a challenge of how to design an accurate energy function which ensures low-energy conformations \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ogunleye:2020:XMC, author = "Adeola Ogunleye and Qing-Guo Wang", title = "{XGBoost} Model for Chronic Kidney Disease Diagnosis", journal = j-TCBB, volume = "17", number = "6", pages = "2131--2140", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911071", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911071", abstract = "Chronic Kidney Disease (CKD) is a menace that is affecting 10 percent of the world population and 15 percent of the South African population. The early and cheap diagnosis of this disease with accuracy and reliability will save 20,000 lives in South \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Manica:2020:FAA, author = "Matteo Manica and Raphael Polig and Mitra Purandare and Roland Mathis and Christoph Hagleitner and Mar{\'\i}a Rodr{\'\i}guez Mart{\'\i}nez", title = "{FPGA} Accelerated Analysis of {Boolean} Gene Regulatory Networks", journal = j-TCBB, volume = "17", number = "6", pages = "2141--2147", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2936836", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2936836", abstract = "Boolean models are a powerful abstraction for qualitative modeling of gene regulatory networks. With the recent availability of advanced high-throughput technologies, Boolean models have increasingly grown in size and complexity, posing a challenge for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xiao:2020:CDW, author = "Ming Xiao and Xiangyu Yang and Jun Yu and Le Zhang", title = "{CGIDLA}: Developing the {Web} Server for {CpG Island} Related Density and {LAUPs} (Lineage-Associated Underrepresented Permutations) Study", journal = j-TCBB, volume = "17", number = "6", pages = "2148--2154", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2935971", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2935971", abstract = "It is well known that CpG island plays an important role in gene methylation. Since CpG island is closely related to human genetic characteristics such as TATA-box, tissue expression specificity, and LAUPs (Lineage-associated Underrepresented Permutations). \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Petti:2020:CSD, author = "Manuela Petti and Daniele Bizzarri and Antonella Verrienti and Rosa Falcone and Lorenzo Farina", title = "Connectivity Significance for Disease Gene Prioritization in an Expanding Universe", journal = j-TCBB, volume = "17", number = "6", pages = "2155--2161", month = nov, year = "2020", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2938512", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Feb 23 08:57:36 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2938512", abstract = "A fundamental topic in network medicine is disease genes prioritization. The underlying hypothesis is that disease genes are organized as modules confined within the interactome. Here, we propose a novel algorithm called DiaBLE \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Martin-Vide:2021:ACBa, author = "Carlos Mart{\'\i}n-Vide and Miguel A. Vega-Rodr{\'\i}guez", title = "{{\booktitle{Algorithms for Computational Biology}}}: Sixth Edition", journal = j-TCBB, volume = "18", number = "1", pages = "1--1", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3023866", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3023866", abstract = "The papers in this special section were presented at the Sixth International Conference on Algorithms for Computational Biology, AlCoB 2019, that was held in Berkeley on May 28--30, 2019.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Le:2021:UCS, author = "Thien Le and Aaron Sy and Erin K. Molloy and Qiuyi Zhang and Satish Rao and Tandy Warnow", title = "Using Constrained-{INC} for Large-Scale Gene Tree and Species Tree Estimation", journal = j-TCBB, volume = "18", number = "1", pages = "2--15", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2990867", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2990867", abstract = "Incremental tree building (INC) is a new phylogeny estimation method that has been proven to be absolute fast converging under standard sequence evolution models. A variant of INC, called Constrained-INC, is designed for use in divide-and-conquer \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Maharaj:2021:CNE, author = "Sridevi Maharaj and Taotao Qian and Zarin Ohiba and Wayne Hayes", title = "Common Neighbors Extension of the Sticky Model for {PPI} Networks Evaluated by Global and Local Graphlet Similarity", journal = j-TCBB, volume = "18", number = "1", pages = "16--26", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3017374", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3017374", abstract = "The structure of protein-protein interaction (PPI) networks has been studied for over a decade. Many theoretical models have been proposed to model PPI network structure, but continuing noise and incompleteness in these networks make conclusions about \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xiao:2021:EIA, author = "Peng Xiao and Xingyu Cai and Sanguthevar Rajasekaran", title = "{EMS3}: an Improved Algorithm for Finding Edit-Distance Based Motifs", journal = j-TCBB, volume = "18", number = "1", pages = "27--37", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3024222", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3024222", abstract = "Discovering patterns in biological sequences is a crucial step to extract useful information from them. Motifs can be viewed as patterns that occur exactly or with minor changes across some or all of the biological sequences. Motif search has numerous \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2021:GET, author = "Liu Liu and Reza Zare and Shuihua Wang", title = "Guest Editorial: Transfer Learning Methods Used in Medical Imaging and Health Informatics", journal = j-TCBB, volume = "18", number = "1", pages = "38--39", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3020460", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3020460", abstract = "The eight papers in this special section focus on novel theories and methods using transfer learning proposed for medical imaging and health information processes.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2021:NNT, author = "Yizhang Jiang and Xiaoqing Gu and Dongrui Wu and Wenlong Hang and Jing Xue and Shi Qiu and Chin-Teng Lin", title = "A Novel Negative-Transfer-Resistant Fuzzy Clustering Model With a Shared Cross-Domain Transfer Latent Space and its Application to Brain {CT} Image Segmentation", journal = j-TCBB, volume = "18", number = "1", pages = "40--52", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2963873", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2963873", abstract = "Traditional clustering algorithms for medical image segmentation can only achieve satisfactory clustering performance under relatively ideal conditions, in which there is adequate data from the same distribution, and the data is rarely disturbed by noise \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xia:2021:CDC, author = "Kaijian Xia and TongGuang Ni and Hongsheng Yin and Bo Chen", title = "Cross-Domain Classification Model With Knowledge Utilization Maximization for Recognition of Epileptic {EEG} Signals", journal = j-TCBB, volume = "18", number = "1", pages = "53--61", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2973978", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2973978", abstract = "Conventional classification models for epileptic EEG signal recognition need sufficient labeled samples as training dataset. In addition, when training and testing EEG signal samples are collected from different distributions, for example, due to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2021:DSN, author = "Chenxi Huang and Yisha Lan and Gaowei Xu and Xiaojun Zhai and Jipeng Wu and Fan Lin and Nianyin Zeng and Qingqi Hong and E. Y. K. Ng and Yonghong Peng and Fei Chen and Guokai Zhang", title = "A Deep Segmentation Network of Multi-Scale Feature Fusion Based on Attention Mechanism for {IVOCT} Lumen Contour", journal = j-TCBB, volume = "18", number = "1", pages = "62--69", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2973971", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2973971", abstract = "Recently, coronary heart disease has attracted more and more attention, where segmentation and analysis for vascular lumen contour are helpful for treatment. And intravascular optical coherence tomography (IVOCT) images are used to display lumen shapes in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qian:2021:TUM, author = "Pengjiang Qian and Jiamin Zheng and Qiankun Zheng and Yuan Liu and Tingyu Wang and Rose {Al Helo} and Atallah Baydoun and Norbert Avril and Rodney J. Ellis and Harry Friel and Melanie S. Traughber and Ajit Devaraj and Bryan Traughber and Raymond F. Muzic", title = "Transforming {UTE-mDixon MR} Abdomen--Pelvis Images Into {CT} by Jointly Leveraging Prior Knowledge and Partial Supervision", journal = j-TCBB, volume = "18", number = "1", pages = "70--82", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2979841", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2979841", abstract = "Computed tomography (CT) provides information for diagnosis, PET attenuation correction (AC), and radiation treatment planning (RTP). Disadvantages of CT include poor soft tissue contrast and exposure to ionizing radiation. While MRI can overcome these \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Singh:2021:IBC, author = "Rishav Singh and Tanveer Ahmed and Abhinav Kumar and Amit Kumar Singh and Anil Kumar Pandey and Sanjay Kumar Singh", title = "Imbalanced Breast Cancer Classification Using Transfer Learning", journal = j-TCBB, volume = "18", number = "1", pages = "83--93", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2980831", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2980831", abstract = "Accurate breast cancer detection using automated algorithms remains a problem within the literature. Although a plethora of work has tried to address this issue, an exact solution is yet to be found. This problem is further exacerbated by the fact that \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2021:RSB, author = "Xiang Yu and Cheng Kang and David S. Guttery and Seifedine Kadry and Yang Chen and Yu-Dong Zhang", title = "{ResNet--SCDA--50} for Breast Abnormality Classification", journal = j-TCBB, volume = "18", number = "1", pages = "94--102", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2986544", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2986544", abstract = "Aim: Breast cancer is the most common cancer in women and the second most common cancer worldwide. With the rapid advancement of deep learning, the early stages of breast cancer development can be accurately detected by radiologists with the help of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2021:TLB, author = "Jintai Chen and Haochao Ying and Xuechen Liu and Jingjing Gu and Ruiwei Feng and Tingting Chen and Honghao Gao and Jian Wu", title = "A Transfer Learning Based Super-Resolution Microscopy for Biopsy Slice Images: The Joint Methods Perspective", journal = j-TCBB, volume = "18", number = "1", pages = "103--113", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2991173", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2991173", abstract = "Higher-resolution biopsy slice images reveal many details, which are widely used in medical practice. However, taking high-resolution slice images is more costly than taking low-resolution ones. In this paper, we propose a joint framework containing a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jin:2021:LTD, author = "Yong Jin and Zhenjiang Qian and Shengrong Gong and Weiyong Yang", title = "Learning Transferable Driven and Drone Assisted Sustainable and Robust Regional Disease Surveillance for Smart Healthcare", journal = j-TCBB, volume = "18", number = "1", pages = "114--125", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3017041", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3017041", abstract = "Smart healthcare has been applied in many fields such as disease surveillance and telemedicine, etc. However, there are some challenges for device deployment, data collection and guarantee of stainability in regional disease surveillance. First, it is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lim:2021:NWI, author = "Hansaim Lim and Lei Xie", title = "A New Weighted Imputed Neighborhood-Regularized Tri-Factorization One-Class Collaborative Filtering Algorithm: Application to Target Gene Prediction of Transcription Factors", journal = j-TCBB, volume = "18", number = "1", pages = "126--137", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2968442", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2968442", abstract = "Identifying target genes of transcription factors (TFs) is crucial to understand transcriptional regulation. However, our understanding of genome-wide TF targeting profile is limited due to the cost of large-scale experiments and intrinsic complexity of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Allen:2021:UFP, author = "Daniel R. Allen and Sharma V. Thankachan and Bojian Xu", title = "An Ultra-Fast and Parallelizable Algorithm for Finding $k$-Mismatch Shortest Unique Substrings", journal = j-TCBB, volume = "18", number = "1", pages = "138--148", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2968531", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2968531", abstract = "This paper revisits the k-mismatch shortest unique substring finding problem and demonstrates that a technique recently presented in the context of solving the k-mismatch average common substring problem can be adapted and combined with parts of the existing solution, resulting in a new algorithm which has expected time complexity of $ O(n \log^k n) $, while maintaining a practical space complexity at $ O(k n) $, where $n$ is the string length. When $ k > 0$, which is the hard case, our new proposal significantly improves the anycase $ O(n^2)$ time complexity of the prior best method for $k$-mismatch shortest unique substring finding. Experimental study shows that our new algorithm is practical to implement and demonstrates significant improvements in processing time compared to the prior best solution's implementation when $k$ is small relative ton. For example, our method processes a 200 KB sample DNA sequence with $ k = 1$ in just 0.18 seconds compared to 174.37 seconds with the prior best solution. Further, it is observed that significant portions of the adapted technique can be executed in parallel, using two different simple concurrency models, resulting in further significant practical performance improvement. As an example, when using 8 cores, the parallel implementations both achieved processing times that are less than 1/4 of the serial implementation's time cost, when processing a 10 MB sample DNA sequence with $ k = 2$. In an age where instances with thousands of gigabytes of RAM are readily available for use through Cloud infrastructure providers, it is likely that the trade-off of additional memory usage for significantly improved processing times will be desirable and needed by many users. For example, the best prior solution may spend years to finish a DNA sample of 200MB for any $ k > 0$, while this new proposal, using 24 cores, can finish processing a sample of this size with $ k = 1$ in 206.376 seconds with a peak memory usage of 46 GB, which is both easily available and affordable on Cloud. It is expected that this new efficient and practical algorithm for $k$-mismatch shortest unique substring finding will prove useful to those using the measure on long sequences in fields such as computational biology. We also give a theoretical bound that the $k$-mismatch shortest unique substring finding problem can be solved using $ O(n \log^k n)$ time and $ O(n)$ space, asymptotically much better than the one we implemented, serving as a new discovery of interest.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tabaszewski:2021:CAS, author = "P. Tabaszewski and P. G{\'o}recki and A. Markin and T. Anderson and O. Eulenstein", title = "Consensus of All Solutions for Intractable Phylogenetic Tree Inference", journal = j-TCBB, volume = "18", number = "1", pages = "149--161", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2947051", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2947051", abstract = "Solving median tree problems is a classic approach for inferring species trees from a collection of discordant gene trees. Median tree problems are typically NP-hard and dealt with by local search heuristics. Unfortunately, such heuristics generally lack \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pattabiraman:2021:PHM, author = "Srilakshmi Pattabiraman and Tandy Warnow", title = "Profile Hidden {Markov} Models Are Not Identifiable", journal = j-TCBB, volume = "18", number = "1", pages = "162--172", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2933821", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2933821", abstract = "Profile Hidden Markov Models (HMMs) are graphical models that can be used to produce finite length sequences from a distribution. In fact, although they were only introduced for bioinformatics 25 years ago (by Haussler et al., Hawaii International \ldots{}).", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guan:2021:MGS, author = "Jiaqi Guan and Runzhe Li and Sheng Yu and Xuegong Zhang", title = "A Method for Generating Synthetic Electronic Medical Record Text", journal = j-TCBB, volume = "18", number = "1", pages = "173--182", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2948985", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2948985", abstract = "Machine learning (ML) and Natural Language Processing (NLP) have achieved remarkable success in many fields and have brought new opportunities and high expectation in the analyses of medical data, of which the most common type is the massive free-text \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qin:2021:OIR, author = "Ruiqi Qin and Lei Duan and Huiru Zheng and Jesse Li-Ling and Kaiwen Song and Yidan Zhang", title = "An Ontology-Independent Representation Learning for Similar Disease Detection Based on Multi-Layer Similarity Network", journal = j-TCBB, volume = "18", number = "1", pages = "183--193", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2941475", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2941475", abstract = "To identify similar diseases has significant implications for revealing the etiology and pathogenesis of diseases and further research in the domain of biomedicine. Currently, most methods for the measurement of disease similarity utilize either \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Long:2021:FDE, author = "Wei Long and Tiange Li and Yang Yang and Hong-Bin Shen", title = "{FlyIT}: \bioname{Drosophila} Embryogenesis Image Annotation based on Image Tiling and Convolutional Neural Networks", journal = j-TCBB, volume = "18", number = "1", pages = "194--204", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2935723", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2935723", abstract = "With the rise of image-based transcriptomics, spatial gene expression data has become increasingly important for understanding gene regulations from the tissue level down to the cell level. Especially, the gene expression images of Drosophila embryos \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dai:2021:GAG, author = "Suyang Dai and Yuxia Ding and Zihan Zhang and Wenxuan Zuo and Xiaodi Huang and Shanfeng Zhu", title = "{GrantExtractor}: Accurate Grant Support Information Extraction from Biomedical Fulltext Based on {Bi-LSTM-CRF}", journal = j-TCBB, volume = "18", number = "1", pages = "205--215", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2939128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2939128", abstract = "Grant support (GS) in the MEDLINE database refers to funding agencies and contract numbers. It is important for funding organizations to track their funding outcomes from the GS information. As such, how to accurately and automatically extract funding \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2021:IMO, author = "Bo Yang and Yupei Zhang and Shanmin Pang and Xuequn Shang and Xueqing Zhao and Minghui Han", title = "Integrating Multi-Omic Data With Deep Subspace Fusion Clustering for Cancer Subtype Prediction", journal = j-TCBB, volume = "18", number = "1", pages = "216--226", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2951413", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2951413", abstract = "One type of cancer usually consists of several subtypes with distinct clinical implications, thus the cancer subtype prediction is an important task in disease diagnosis and therapy. Utilizing one type of data from molecular layers in biological system to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Du:2021:MTS, author = "Lei Du and Kefei Liu and Xiaohui Yao and Shannon L. Risacher and Junwei Han and Andrew J. Saykin and Lei Guo and Li Shen", title = "Multi-Task Sparse Canonical Correlation Analysis with Application to Multi-Modal Brain Imaging Genetics", journal = j-TCBB, volume = "18", number = "1", pages = "227--239", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2947428", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2947428", abstract = "Brain imaging genetics studies the genetic basis of brain structures and functionalities via integrating genotypic data such as single nucleotide polymorphisms (SNPs) and imaging quantitative traits (QTs). In this area, both multi-task learning (MTL) and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gao:2021:PAM, author = "Jianliang Gao and Ling Tian and Tengfei Lv and Jianxin Wang and Bo Song and Xiaohua Hu", title = "{Protein2Vec}: Aligning Multiple {PPI} Networks with Representation Learning", journal = j-TCBB, volume = "18", number = "1", pages = "240--249", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2937771", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2937771", abstract = "Research of Protein-Protein Interaction (PPI) Network Alignment is playing an important role in understanding the crucial underlying biological knowledge such as functionally homologous proteins and conserved evolutionary pathways across different \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chalk:2021:CRI, author = "Cameron Chalk and Niels Kornerup and Wyatt Reeves and David Soloveichik", title = "Composable Rate-Independent Computation in Continuous Chemical Reaction Networks", journal = j-TCBB, volume = "18", number = "1", pages = "250--260", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2952836", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2952836", abstract = "Biological regulatory networks depend upon chemical interactions to process information. Engineering such molecular computing systems is a major challenge for synthetic biology and related fields. The chemical reaction network (CRN) model idealizes \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dunn:2021:FAN, author = "Sara-Jane Dunn and Hillel Kugler and Boyan Yordanov", title = "Formal Analysis of Network Motifs Links Structure to Function in Biological Programs", journal = j-TCBB, volume = "18", number = "1", pages = "261--271", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2948157", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2948157", abstract = "A recurring set of small sub-networks have been identified as the building blocks of biological networks across diverse organisms. These network motifs are associated with certain dynamic behaviors and define key modules that are important for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bokes:2021:MBF, author = "Pavol Bokes and Michal Hojcka and Abhyudai Singh", title = "{MicroRNA} Based Feedforward Control of Intrinsic Gene Expression Noise", journal = j-TCBB, volume = "18", number = "1", pages = "272--282", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2938502", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2938502", abstract = "Intrinsic noise, which arises in gene expression at low copy numbers, can be controlled by diverse regulatory motifs, including feedforward loops. Here, we study an example of a feedforward control system based on the interaction between an mRNA molecule \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Breik:2021:PSI, author = "Keenan Breik and Cameron Chalk and David Doty and David Haley and David Soloveichik", title = "Programming Substrate-Independent Kinetic Barriers With Thermodynamic Binding Networks", journal = j-TCBB, volume = "18", number = "1", pages = "283--295", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2959310", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2959310", abstract = "Engineering molecular systems that exhibit complex behavior requires the design of kinetic barriers. For example, an effective catalytic pathway must have a large barrier when the catalyst is absent. While programming such energy barriers seems to require \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zeng:2021:DLF, author = "Min Zeng and Min Li and Zhihui Fei and Fang-Xiang Wu and Yaohang Li and Yi Pan and Jianxin Wang", title = "A Deep Learning Framework for Identifying Essential Proteins by Integrating Multiple Types of Biological Information", journal = j-TCBB, volume = "18", number = "1", pages = "296--305", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2897679", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2897679", abstract = "Computational methods including centrality and machine learning-based methods have been proposed to identify essential proteins for understanding the minimum requirements of the survival and evolution of a cell. In centrality methods, researchers are \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mu:2021:CML, author = "Quanhua Mu and Jiguang Wang", title = "{CNAPE}: a Machine Learning Method for Copy Number Alteration Prediction from Gene Expression", journal = j-TCBB, volume = "18", number = "1", pages = "306--311", month = jan, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2944827", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Jun 15 14:32:53 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2944827", abstract = "Detection of DNA copy number alteration in cancer cells is critical to understanding cancer initiation and progression. Widely used methods, such as DNA arrays and genomic DNA sequencing, are relatively expensive and require DNA samples at a microgram \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kitrungrotsakul:2021:CCB, author = "Titinunt Kitrungrotsakul and Xian-Hau Han and Yutaro Iwamoto and Satoko Takemoto and Hideo Yokota and Sari Ipponjima and Tomomi Nemoto and Wei Xiong and Yen-Wei Chen", title = "A Cascade of {2.5D CNN} and Bidirectional {CLSTM} Network for Mitotic Cell Detection in {$4$D} Microscopy Image", journal = j-TCBB, volume = "18", number = "2", pages = "396--404", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919015", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919015", abstract = "Mitosis detection is one of the challenging steps in biomedical imaging research, which can be used to observe the cell behavior. Most of the already existing methods that are applied in detecting mitosis usually contain many nonmitotic events (normal \ldots{})", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:FLN, author = "Wen Zhang and Zhishuai Li and Wenzheng Guo and Weitai Yang and Feng Huang", title = "A Fast Linear Neighborhood Similarity-Based Network Link Inference Method to Predict {MicroRNA}--Disease Associations", journal = j-TCBB, volume = "18", number = "2", pages = "405--415", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2931546", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2931546", abstract = "Increasing evidences revealed that microRNAs (miRNAs) play critical roles in important biological processes. The identification of disease-related miRNAs is critical to understand the molecular mechanisms of human diseases. Most existing computational \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:FAC, author = "Biing-Feng Wang and Krister M. Swenson", title = "A Faster Algorithm for Computing the Kernel of Maximum Agreement Subtrees", journal = j-TCBB, volume = "18", number = "2", pages = "416--430", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2922955", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2922955", abstract = "The maximum agreement subtree method determines the consensus of a collection of phylogenetic trees by identifying maximum cardinality subsets of leaves for which all input trees agree. The trees induced by these maximum cardinality subsets are maximum \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2021:GRB, author = "Yunpei Xu and Hong-Dong Li and Yi Pan and Feng Luo and Fang-Xiang Wu and Jianxin Wang", title = "A Gene Rank Based Approach for Single Cell Similarity Assessment and Clustering", journal = j-TCBB, volume = "18", number = "2", pages = "431--442", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2931582", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2931582", abstract = "Single-cell RNA sequencing (scRNA-seq) technology provides quantitative gene expression profiles at single-cell resolution. As a result, researchers have established new ways to explore cell population heterogeneity and genetic variability of cells. One \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Araghi:2021:HSA, author = "Sahar Araghi and Thanh Nguyen", title = "A Hybrid Supervised Approach to Human Population Identification Using Genomics Data", journal = j-TCBB, volume = "18", number = "2", pages = "443--454", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919501", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919501", abstract = "Single nucleotide polymorphisms (SNPs) are one type of genetic variations and each SNP represents a difference in a single DNA building block, namely a nucleotide. Previous research demonstrated that SNPs can be used to identify the correct source \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chou:2021:NBB, author = "Hsin-Hung Chou and Ching-Tien Hsu and Li-Hsuan Chen and Yue-Cheng Lin and Sun-Yuan Hsieh", title = "A Novel Branch-and-Bound Algorithm for the Protein Folding Problem in the {$3$D HP} Model", journal = j-TCBB, volume = "18", number = "2", pages = "455--462", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2934102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2934102", abstract = "The protein folding problem (PFP) is an important issue in bioinformatics and biochemical physics. One of the most widely studied models of protein folding is the hydrophobic-polar (HP) model introduced by Dill. The PFP in the three-dimensional (3D) \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Luo:2021:NDR, author = "Huimin Luo and Jianxin Wang and Cheng Yan and Min Li and Fang-Xiang Wu and Yi Pan", title = "A Novel Drug Repositioning Approach Based on Collaborative Metric Learning", journal = j-TCBB, volume = "18", number = "2", pages = "463--471", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2926453", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2926453", abstract = "Computational drug repositioning, which is an efficient approach to find potential indications for drugs, has been used to increase the efficiency of drug development. The drug repositioning problem essentially is a top-K recommendation task that \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sadeghi:2021:ARC, author = "Seyedeh Shaghayegh Sadeghi and Mohammad Reza Keyvanpour", title = "An Analytical Review of Computational Drug Repurposing", journal = j-TCBB, volume = "18", number = "2", pages = "472--488", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2933825", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2933825", abstract = "Drug repurposing is a vital function in pharmaceutical fields and has gained popularity in recent years in both the pharmaceutical industry and research community. It refers to the process of discovering new uses and indications for existing or failed \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Banerjee:2021:EPG, author = "Anupam Banerjee and Kuntal Pal and Pralay Mitra", title = "An Evolutionary Profile Guided Greedy Parallel Replica-Exchange {Monte Carlo} Search Algorithm for Rapid Convergence in Protein Design", journal = j-TCBB, volume = "18", number = "2", pages = "489--499", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2928809", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2928809", abstract = "Protein design, also known as the inverse protein folding problem, is the identification of a protein sequence that folds into a target protein structure. Protein design is proved as an NP-hard problem. While researchers are working on designing \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gu:2021:IMT, author = "Wanrong Gu and Ziye Zhang and Xianfen Xie and Yichen He", title = "An Improved Muti-Task Learning Algorithm for Analyzing Cancer Survival Data", journal = j-TCBB, volume = "18", number = "2", pages = "500--511", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2920770", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2920770", abstract = "Survival analysis is a popular branch of statistics. At present, many algorithms (like traditional multi-tasking learning model) cannot be applied well in practice because of censored data. Although using some model (like parametric regression model) can \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chow:2021:PAA, author = "Kevin Chow and Aisharjya Sarkar and Rasha Elhesha and Pietro Cinaglia and Ahmet Ay and Tamer Kahveci", title = "\pkg{ANCA}: Alignment-Based Network Construction Algorithm", journal = j-TCBB, volume = "18", number = "2", pages = "512--524", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2923620", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2923620", abstract = "Dynamic biological networks model changes in the network topology over time. However, often the topologies of these networks are not available at specific time points. Existing algorithms for studying dynamic networks often ignore this problem and focus \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gonzalez:2021:BIL, author = "Miguel Gonz{\'a}lez and Cristina Guti{\'e}rrez and Rodrigo Mart{\'\i}nez", title = "{Bayesian} Inference in {Y}-Linked Two-Sex Branching Processes with Mutations: {ABC} Approach", journal = j-TCBB, volume = "18", number = "2", pages = "525--538", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2921308", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2921308", abstract = "A Y-linked two-sex branching process with mutations and blind choice of males is a suitable model for analyzing the evolution of the number of carriers of a Y-linked allele and its mutations. Such a model considers a two-sex monogamous population in which \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yuan:2021:PCI, author = "Xiguo Yuan and Jiaao Yu and Jianing Xi and Liying Yang and Junliang Shang and Zhe Li and Junbo Duan", title = "\pkg{CNV\_IFTV}: an Isolation Forest and Total Variation-Based Detection of {CNVs} from Short-Read Sequencing Data", journal = j-TCBB, volume = "18", number = "2", pages = "539--549", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2920889", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2920889", abstract = "Accurate detection of copy number variations (CNVs) from short-read sequencing data is challenging due to the uneven distribution of reads and the unbalanced amplitudes of gains and losses. The direct use of read depths to measure CNVs tends to limit \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Biswas:2021:CNM, author = "Sourav Biswas and Sumanta Ray and Sanghamitra Bandyopadhyay", title = "Colored Network {Motif} Analysis by Dynamic Programming Approach: an Application in Host Pathogen Interaction Network", journal = j-TCBB, volume = "18", number = "2", pages = "550--561", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2923173", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2923173", abstract = "Network motifs are subgraphs of a network which are found with significantly higher frequency than that expected in similar random networks. Motifs are small building blocks of a network and they have emerged as a way to uncover topological properties of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kimmel:2021:DCR, author = "Jacob C. Kimmel and Andrew S. Brack and Wallace F. Marshall", title = "Deep Convolutional and Recurrent Neural Networks for Cell Motility Discrimination and Prediction", journal = j-TCBB, volume = "18", number = "2", pages = "562--574", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919307", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919307", abstract = "Cells in culture display diverse motility behaviors that may reflect differences in cell state and function, providing motivation to discriminate between different motility behaviors. Current methods to do so rely upon manual feature engineering. However, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2021:DDL, author = "Min Li and Yake Wang and Ruiqing Zheng and Xinghua Shi and Yaohang Li and Fang-Xiang Wu and Jianxin Wang", title = "{DeepDSC}: a Deep Learning Method to Predict Drug Sensitivity of Cancer Cell Lines", journal = j-TCBB, volume = "18", number = "2", pages = "575--582", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919581", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919581", abstract = "High-throughput screening technologies have provided a large amount of drug sensitivity data for a panel of cancer cell lines and hundreds of compounds. Computational approaches to analyzing these data can benefit anticancer therapeutics by identifying \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:IRP, author = "Jingrong Zhang and Zihao Wang and Zhiyong Liu and Fa Zhang", title = "Improve the Resolution and Parallel Performance of the Three-Dimensional Refine Algorithm in {RELION} Using {CUDA} and {MPI}", journal = j-TCBB, volume = "18", number = "2", pages = "583--595", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2929171", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2929171", abstract = "In cryo-electron microscopy, RELION is a powerful tool for high-resolution reconstruction. Due to the complicated imaging procedure and the heterogeneity of particles, some of the selected particle images offer more disturbing information than others. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Awais:2021:PIP, author = "Muhammad Awais and Waqar Hussain and Yaser Daanial Khan and Nouman Rasool and Sher Afzal Khan and Kuo-Chen Chou", title = "\pkg{iPhosH-PseAAC}: Identify Phosphohistidine Sites in Proteins by Blending Statistical Moments and Position Relative Features According to the {Chou}'s 5-Step Rule and General Pseudo Amino Acid Composition", journal = j-TCBB, volume = "18", number = "2", pages = "596--610", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919025", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919025", abstract = "Protein phosphorylation is one of the key mechanism in prokaryotes and eukaryotes and is responsible for various biological functions such as protein degradation, intracellular localization, the multitude of cellular processes, molecular association, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2021:PMP, author = "Cheng Yan and Guihua Duan and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "\pkg{MCHMDA}: Predicting Microbe-Disease Associations Based on Similarities and Low-Rank Matrix Completion", journal = j-TCBB, volume = "18", number = "2", pages = "611--620", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2926716", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2926716", abstract = "With the development of high-through sequencing technology and microbiology, many studies have evidenced that microbes are associated with human diseases, such as obesity, liver cancer, and so on. Therefore, identifying the association between microbes \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2021:PMM, author = "Cheng Peng and Xinyu Wu and Wen Yuan and Xinran Zhang and Yu Zhang and Ying Li", title = "\pkg{MGRFE}: Multilayer Recursive Feature Elimination Based on an Embedded Genetic Algorithm for Cancer Classification", journal = j-TCBB, volume = "18", number = "2", pages = "621--632", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2921961", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2921961", abstract = "Microarray gene expression data have become a topic of great interest for cancer classification and for further research in the field of bioinformatics. Nonetheless, due to the &\#x201C;large {$<$ inline}-{formula$ > $$ <$ t e x} - math notation = ``LaTeX''{ $ >$}$ p${$ <$ } / tex - {m a t h $ > $$<$}. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gondeau:2021:OWN, author = "Alexandre Gondeau and Zahia Aouabed and Mohamed Hijri and Pedro R. Peres-Neto and Vladimir Makarenkov", title = "Object Weighting: a New Clustering Approach to Deal with Outliers and Cluster Overlap in Computational Biology", journal = j-TCBB, volume = "18", number = "2", pages = "633--643", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2921577", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2921577", abstract = "Considerable efforts have been made over the last decades to improve the robustness of clustering algorithms against noise features and outliers, known to be important sources of error in clustering. Outliers dominate the sum-of-the-squares calculations \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Karbalayghareh:2021:OBT, author = "Alireza Karbalayghareh and Xiaoning Qian and Edward R. Dougherty", title = "Optimal {Bayesian} Transfer Learning for Count Data", journal = j-TCBB, volume = "18", number = "2", pages = "644--655", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2920981", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2920981", abstract = "There is often a limited amount of omics data to design predictive models in biomedicine. Knowing that these omics data come from underlying processes that may share common pathways and disease mechanisms, it may be beneficial for designing a more \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lopez-Lopera:2021:PIG, author = "Andr{\'e}s F. L{\'o}pez-Lopera and Nicolas Durrande and Mauricio A. {\'A}lvarez", title = "Physically-Inspired {Gaussian} Process Models for Post-Transcriptional Regulation in \bioname{Drosophila}", journal = j-TCBB, volume = "18", number = "2", pages = "656--666", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2918774", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2918774", abstract = "The regulatory process of Drosophila is thoroughly studied for understanding a great variety of biological principles. While pattern-forming gene networks are analyzed in the transcription step, post-transcriptional events (e.g., translation, protein \ldots{})", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:PVT, author = "Qinhu Zhang and Zhen Shen and De-Shuang Huang", title = "Predicting {\em in-vitro\/} Transcription Factor Binding Sites Using {DNA} Sequence $+$ Shape", journal = j-TCBB, volume = "18", number = "2", pages = "667--676", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2947461", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2947461", abstract = "Discovery of transcription factor binding sites (TFBSs) is essential for understanding the underlying binding mechanisms and cellular functions. Recently, Convolutional neural network (CNN) has succeeded in predicting TFBSs from the primary DNA sequences. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qiu:2021:PAR, author = "Jingxuan Qiu and Tianyi Qiu and Qingli Dong and Dongpo Xu and Xiang Wang and Qi Zhang and Jing Pan and Qing Liu", title = "Predicting the Antigenic Relationship of Foot-and-Mouth Disease Virus for Vaccine Selection Through a Computational Model", journal = j-TCBB, volume = "18", number = "2", pages = "677--685", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2923396", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2923396", abstract = "Foot-and-mouth disease virus (FMDV) is an antigenic-variable RNA virus that is responsible for the recurrence of foot-and-mouth disease in livestock and can be prevented and controlled using a vaccine with broad-spectrum protection. Current anti-genicity \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:PRB, author = "Chen Zhang and Yanrui Ding", title = "Probing the Relation Between Community Evolution in Dynamic Residue Interaction Networks and Xylanase Thermostability", journal = j-TCBB, volume = "18", number = "2", pages = "686--696", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2922906", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2922906", abstract = "Residue-residue interactions are the basis of protein thermostability. The molecular conformations of {$<$ italic$>$Streptomyces} {lividans$<$}/{italic$>$} xylanase (xyna_strli) and {$<$ italic$>$Thermoascus} {aurantiacus$<$}/{italic$>$} xylanase (xyna_theau) at 300K, 325K and 350K \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:PSP, author = "Gui-Jun Zhang and Teng-Yu Xie and Xiao-Gen Zhou and Liu-Jing Wang and Jun Hu", title = "Protein Structure Prediction Using Population-Based Algorithm Guided by Information Entropy", journal = j-TCBB, volume = "18", number = "2", pages = "697--707", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2921958", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2921958", abstract = "Ab initio protein structure prediction is one of the most challenging problems in computational biology. Multistage algorithms are widely used in ab initio protein structure prediction. The different computational costs of a multistage algorithm for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Biswas:2021:RPC, author = "Saikat Biswas and Pabitra Mitra and Krothapalli Sreenivasa Rao", title = "Relation Prediction of Co-Morbid Diseases Using Knowledge Graph Completion", journal = j-TCBB, volume = "18", number = "2", pages = "708--717", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2927310", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2927310", abstract = "Co-morbid disease condition refers to the simultaneous presence of one or more diseases along with the primary disease. A patient suffering from co-morbid diseases possess more mortality risk than with a disease alone. So, it is necessary to predict co-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Song:2021:SDS, author = "Xiaona Song and Mi Wang and Shuai Song and Choon Ki Ahn", title = "Sampled-Data State Estimation of Reaction Diffusion Genetic Regulatory Networks via Space-Dividing Approaches", journal = j-TCBB, volume = "18", number = "2", pages = "718--730", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919532", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919532", abstract = "A novel state estimator is designed for genetic regulatory networks with reaction-diffusion terms in this study. First, the diffusion space (where mRNA and protein exist) is divided into several parts and only a point, a line, or a plane, etc., is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2021:SNG, author = "Guimu Guo and Hongzhi Chen and Da Yan and James Cheng and Jake Y. Chen and Zechen Chong", title = "Scalable {De Novo} Genome Assembly Using a Pregel-Like Graph-Parallel System", journal = j-TCBB, volume = "18", number = "2", pages = "731--744", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2920912", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2920912", abstract = "De novo genome assembly is the process of stitching short DNA sequences to generate longer DNA sequences, without using any reference sequence for alignment. It enables high-throughput genome sequencing and thus accelerates the discovery of new genomes. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2021:GES, author = "Da Yan and Sharma Thankachan and Jake Y. Chen", title = "Guest Editorial for Selected Papers From {BIOKDD 2019}", journal = j-TCBB, volume = "18", number = "3", pages = "809--810", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3067071", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3067071", abstract = "The papers in this special section were presented at the 18th International Workshop on Data Mining in Bioinformatics (BIOKDD), held in conjunction with the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining that was held on August \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Raghu:2021:PIT, author = "Vineet K. Raghu and Xiaoyu Ge and Arun Balajiee and Daniel J. Shirer and Isha Das and Panayiotis V. Benos and Panos K. Chrysanthis", title = "A Pipeline for Integrated Theory and Data-Driven Modeling of Biomedical Data", journal = j-TCBB, volume = "18", number = "3", pages = "811--822", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3019237", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3019237", abstract = "Genome sequencing technologies have the potential to transform clinical decision making and biomedical research by enabling high-throughput measurements of the genome at a granular level. However, to truly understand mechanisms of disease and predict the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2021:BKG, author = "Tianwen Jiang and Qingkai Zeng and Tong Zhao and Bing Qin and Ting Liu and Nitesh V. Chawla and Meng Jiang", title = "Biomedical Knowledge Graphs Construction From Conditional Statements", journal = j-TCBB, volume = "18", number = "3", pages = "823--835", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2979959", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2979959", abstract = "Conditions play an essential role in biomedical statements. However, existing biomedical knowledge graphs (BioKGs) only focus on factual knowledge, organized as a flat relational network of biomedical concepts. These BioKGs ignore the conditions of the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sreedharan:2021:RPE, author = "Jithin K. Sreedharan and Krzysztof Turowski and Wojciech Szpankowski", title = "Revisiting Parameter Estimation in Biological Networks: Influence of Symmetries", journal = j-TCBB, volume = "18", number = "3", pages = "836--849", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2980260", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2980260", abstract = "Graph models often give us a deeper understanding of real-world networks. In the case of biological networks they help in predicting the evolution and history of biomolecule interactions, provided we map properly real networks into the corresponding graph \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2021:GEM, author = "Ke Yan and Zhiwei Ji and Qun Jin and Qing-Guo Wang", title = "Guest Editorial: Machine Learning for {AI}-Enhanced Healthcare and Medical Services: New Development and Promising Solution", journal = j-TCBB, volume = "18", number = "3", pages = "850--851", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3050935", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3050935", abstract = "The papers in this special section focus on machine learning for artificial intelligent-enhances healthcare and medical services. These services are always among the top concerns for humans, especially under the special situation of COVID-19 pandemic, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lian:2021:GLE, author = "Sheng Lian and Lei Li and Guiren Lian and Xiao Xiao and Zhiming Luo and Shaozi Li", title = "A Global and Local Enhanced Residual {U-Net} for Accurate Retinal Vessel Segmentation", journal = j-TCBB, volume = "18", number = "3", pages = "852--862", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2917188", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2917188", abstract = "Retinal vessel segmentation is a critical procedure towards the accurate visualization, diagnosis, early treatment, and surgery planning of ocular diseases. Recent deep learning-based approaches have achieved impressive performance in retinal vessel \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2021:HEA, author = "Huijuan Lu and Huiyun Gao and Minchao Ye and Xiuhui Wang", title = "A Hybrid Ensemble Algorithm Combining {AdaBoost} and Genetic Algorithm for Cancer Classification with Gene Expression Data", journal = j-TCBB, volume = "18", number = "3", pages = "863--870", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2952102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2952102", abstract = "The diversity of base classifiers and integration of multiple classifiers are two key issues in the field of ensemble learning. This paper puts forward a hybrid ensemble algorithm combining AdaBoost and genetic algorithm(GA) for cancer classification with \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2021:MIP, author = "Xiaodan Yan and Baojiang Cui and Yang Xu and Peilin Shi and Ziqi Wang", title = "A Method of Information Protection for Collaborative Deep Learning under {GAN} Model Attack", journal = j-TCBB, volume = "18", number = "3", pages = "871--881", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2940583", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2940583", abstract = "Deep learning is widely used in the medical field owing to its high accuracy in medical image classification and biological applications. However, under collaborative deep learning, there is a serious risk of information leakage based on the deep \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:USC, author = "Qingchen Zhang and Changchuan Bai and Laurence T. Yang and Zhikui Chen and Peng Li and Hang Yu", title = "A Unified Smart {Chinese} Medicine Framework for Healthcare and Medical Services", journal = j-TCBB, volume = "18", number = "3", pages = "882--890", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914447", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914447", abstract = "Smart Chinese medicine has emerged to contribute to the evolution of healthcare and medical services by applying machine learning together with advanced computing techniques like cloud computing to computer-aided diagnosis and treatment in the health \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2021:KPD, author = "Dehua Chen and Meihua Huang and Weimin Li", title = "Knowledge-Powered Deep Breast Tumor Classification With Multiple Medical Reports", journal = j-TCBB, volume = "18", number = "3", pages = "891--901", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2955484", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2955484", abstract = "Breast tumor classification with multiple medical reports such as B-ultrasound, Mammography (X-ray) and Nuclear Magnetic Resonance Imaging (MRI) is crucial to the intelligent cancer diagnosis system. Unlike the other domain texts, the medical reports have \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2021:CCB, author = "Yiyuan Chen and Yufeng Wang and Liang Cao and Qun Jin", title = "{CCFS}: a Confidence-Based Cost-Effective Feature Selection Scheme for Healthcare Data Classification", journal = j-TCBB, volume = "18", number = "3", pages = "902--911", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2903804", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2903804", abstract = "Feature selection (FS) is one of the fundamental data processing techniques in various machine learning algorithms, especially for classification of healthcare data. However, it is a challenging issue due to the large search space. Binary Particle Swarm \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2021:CRB, author = "Xiaokang Zhou and Yue Li and Wei Liang", title = "{CNN-RNN} Based Intelligent Recommendation for Online Medical Pre-Diagnosis Support", journal = j-TCBB, volume = "18", number = "3", pages = "912--921", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2994780", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2994780", abstract = "The rapidly developed Health 2.0 technology has provided people with more opportunities to conduct online medical consultation than ever before. Understanding contexts within different online medical communications and activities becomes a significant \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2021:DED, author = "Lu Yan and Weihong Huang and Liming Wang and Song Feng and Yonghong Peng and Jie Peng", title = "Data-Enabled Digestive Medicine: a New Big Data Analytics Platform", journal = j-TCBB, volume = "18", number = "3", pages = "922--931", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2951555", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2951555", abstract = "This paper presents a big data analytics platform for clinical research and practice in the Gastroenterology Department of Xiangya Hospital at Central South University in China. This platform features a comprehensive and systematic support of big data in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Souza:2021:DCI, author = "Mauricio Guevara Souza and Edgar E. Vallejo and Karol Estrada", title = "Detecting Clustered Independent Rare Variant Associations Using Genetic Algorithms", journal = j-TCBB, volume = "18", number = "3", pages = "932--939", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2930505", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2930505", abstract = "The availability of an increasing collection of sequencing data provides the opportunity to study genetic variation with an unprecedented level of detail. There is much interest in uncovering the role of rare variants and their contribution to disease. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2021:DUD, author = "Yongjin Zhou and Weijian Huang and Pei Dong and Yong Xia and Shanshan Wang", title = "{D-UNet}: a Dimension-Fusion {U} Shape Network for Chronic Stroke Lesion Segmentation", journal = j-TCBB, volume = "18", number = "3", pages = "940--950", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2939522", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2939522", abstract = "Assessing the location and extent of lesions caused by chronic stroke is critical for medical diagnosis, surgical planning, and prognosis. In recent years, with the rapid development of 2D and 3D convolutional neural networks (CNN), the encoder-decoder \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:EDP, author = "Yaqi Wang and Lingling Sun and Qun Jin", title = "Enhanced Diagnosis of Pneumothorax with an Improved Real-Time Augmentation for Imbalanced Chest {X}-rays Data Based on {DCNN}", journal = j-TCBB, volume = "18", number = "3", pages = "951--962", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2911947", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2911947", abstract = "Pneumothorax is a common pulmonary disease that can lead to dyspnea and can be life-threatening. X-ray examination is the main means to diagnose this disease. Computer-aided diagnosis of pneumothorax on chest X-ray, as a prerequisite for a timely cure, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:HGR, author = "Xiuhui Wang and Shiling Feng and Wei Qi Yan", title = "Human Gait Recognition Based on Self-Adaptive Hidden {Markov} Model", journal = j-TCBB, volume = "18", number = "3", pages = "963--972", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2951146", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2951146", abstract = "Human gait recognition has numerous challenges due to view angle changing, human dressing, bag carrying, and pedestrian walking speed, etc. In order to increase gait recognition accuracy under these circumstances, in this paper we propose a method for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pal:2021:IDR, author = "Jayanta Kumar Pal and Shubhra Sankar Ray and Sankar K. Pal", title = "Identifying Drug Resistant {miRNAs} Using Entropy Based Ranking", journal = j-TCBB, volume = "18", number = "3", pages = "973--984", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2933205", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2933205", abstract = "MicroRNAs play an important role in controlling drug sensitivity and resistance in cancer. Identification of responsible miRNAs for drug resistance can enhance the effectiveness of treatment. A new set theoretic entropy measure (SPEM) is defined to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:IDP, author = "Bing Wang and Changqing Mei and Yuanyuan Wang and Yuming Zhou and Mu-Tian Cheng and Chun-Hou Zheng and Lei Wang and Jun Zhang and Peng Chen and Yan Xiong", title = "Imbalance Data Processing Strategy for Protein Interaction Sites Prediction", journal = j-TCBB, volume = "18", number = "3", pages = "985--994", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2953908", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2953908", abstract = "Protein-protein interactions play essential roles in various biological progresses. Identifying protein interaction sites can facilitate researchers to understand life activities and therefore will be helpful for drug design. However, the number of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2021:MBR, author = "Xiran Jiang and Jiaxin Li and Yangyang Kan and Tao Yu and Shijie Chang and Xianzheng Sha and Hairong Zheng and Yahong Luo and Shanshan Wang", title = "{MRI} Based Radiomics Approach With Deep Learning for Prediction of Vessel Invasion in Early-Stage Cervical Cancer", journal = j-TCBB, volume = "18", number = "3", pages = "995--1002", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2963867", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2963867", abstract = "This article aims to build deep learning-based radiomic methods in differentiating vessel invasion from non-vessel invasion in cervical cancer with multi-parametric MRI data. A set of 1,070 dynamic T1 contrast-enhanced (DCE-T1) and 986 T2 weighted imaging \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2021:MVM, author = "Cheng Li and Jingxu Xu and Qiegen Liu and Yongjin Zhou and Lisha Mou and Zuhui Pu and Yong Xia and Hairong Zheng and Shanshan Wang", title = "Multi-View Mammographic Density Classification by Dilated and Attention-Guided Residual Learning", journal = j-TCBB, volume = "18", number = "3", pages = "1003--1013", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2970713", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2970713", abstract = "Breast density is widely adopted to reflect the likelihood of early breast cancer development. Existing methods of mammographic density classification either require steps of manual operations or achieve only moderate classification accuracy due to the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cakmak:2021:PMA, author = "Ali Cakmak and M. Hasan Celik", title = "Personalized Metabolic Analysis of Diseases", journal = j-TCBB, volume = "18", number = "3", pages = "1014--1025", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3008196", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3008196", abstract = "The metabolic wiring of patient cells is altered drastically in many diseases, including cancer. Understanding the nature of such changes may pave the way for new therapeutic opportunities as well as the development of personalized treatment strategies \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:PPG, author = "Wenyan Wang and Yuming Zhou and Mu-Tian Cheng and Yan Wang and Chun-Hou Zheng and Yan Xiong and Peng Chen and Zhiwei Ji and Bing Wang", title = "Potential Pathogenic Genes Prioritization Based on Protein Domain Interaction Network Analysis", journal = j-TCBB, volume = "18", number = "3", pages = "1026--1034", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2983894", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2983894", abstract = "Pathogenicity-related studies are of great importance in understanding the pathogenesis of complex diseases and improving the level of clinical medicine. This work proposed a bioinformatics scheme to analyze cancer-related gene mutations, and try to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bouasker:2021:PAB, author = "Souad Bouasker and Wissem Inoubli and Sadok {Ben Yahia} and Gayo Diallo", title = "Pregnancy Associated Breast Cancer Gene Expressions: New Insights on Their Regulation Based on Rare Correlated Patterns", journal = j-TCBB, volume = "18", number = "3", pages = "1035--1048", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3015236", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3015236", abstract = "Breast-cancer (BC) is the most common invasive cancer in women, with considerable death. Given that, BC is classified as a hormone-dependent cancer, when it collides with pregnancy, different questions may arise for which there are still no convincing \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2021:NAP, author = "Jiechen Li and Haochen Zhao and Zhanwei Xuan and Jingwen Yu and Xiang Feng and Bo Liao and Lei Wang", title = "A Novel Approach for Potential Human {LncRNA}-Disease Association Prediction Based on Local Random Walk", journal = j-TCBB, volume = "18", number = "3", pages = "1049--1059", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2934958", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2934958", abstract = "In recent years, lncRNAs (long non-coding RNAs) have been proved to be closely related to many diseases that are seriously harmful to human health. Although researches on clarifying the relationships between lncRNAs and diseases are developing rapidly, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2021:DLG, author = "Min Liu and Yalan Liu and Weili Qian and Yaonan Wang", title = "{DeepSeed} Local Graph Matching for Densely Packed Cells Tracking", journal = j-TCBB, volume = "18", number = "3", pages = "1060--1069", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2936851", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2936851", abstract = "The tracking of densely packed plant cells across microscopy image sequences is very challenging, because their appearance change greatly over time. A local graph matching algorithm was proposed to track such cells by exploiting the tight spatial topology \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:DDM, author = "Zhen Zhang and Junwei Luo and Juan Shang and Min Li and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "Deletion Detection Method Using the Distribution of Insert Size and a Precise Alignment Strategy", journal = j-TCBB, volume = "18", number = "3", pages = "1070--1081", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2934407", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2934407", abstract = "Homozygous and heterozygous deletions commonly exist in the human genome. For current structural variation detection tools, it is significant to determine whether a deletion is homozygous or heterozygous. However, the problems of sequencing errors, micro-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2021:DBF, author = "Tingting Yu and Jianxing Liu and Qingshuang Zeng and Ligang Wu", title = "Dissipativity-Based Filtering for Switched Genetic Regulatory Networks with Stochastic Disturbances and Time-Varying Delays", journal = j-TCBB, volume = "18", number = "3", pages = "1082--1092", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2936351", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2936351", abstract = "This paper deals with the problem of dissipativity-based filtering for switched genetic regulatory networks (GRNs) with stochastic perturbation and time-varying delays. By choosing an appropriate piecewise Lyapunov function and using the average dwell \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{An:2021:HRP, author = "Ying An and Nengjun Huang and Xianlai Chen and Fangxiang Wu and Jianxin Wang", title = "High-Risk Prediction of Cardiovascular Diseases via Attention-Based Deep Neural Networks", journal = j-TCBB, volume = "18", number = "3", pages = "1093--1105", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2935059", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2935059", abstract = "High-risk prediction of cardiovascular disease is of great significance and impendency in medical fields with the increasing phenomenon of sub-health these years. Most existing pathological methods for the prognosis prediction are either costly or prone \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2021:IIA, author = "Qingfeng Chen and Dehuan Lai and Wei Lan and Ximin Wu and Baoshan Chen and Jin Liu and Yi-Ping Phoebe Chen and Jianxin Wang", title = "{ILDMSF}: Inferring Associations Between Long Non-Coding {RNA} and Disease Based on Multi-Similarity Fusion", journal = j-TCBB, volume = "18", number = "3", pages = "1106--1112", month = may # "\slash " # jun, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2936476", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:52 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2936476", abstract = "The dysregulation and mutation of long non-coding RNAs (lncRNAs) have been proved to result in a variety of human diseases. Identifying potential disease-related lncRNAs may benefit disease diagnosis, treatment and prognosis. A number of methods have been \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wei:2021:ECG, author = "Dong-Qing Wei and Aman Chandra Kaushik and Gurudeeban Selvaraj and Yi Pan", title = "Editorial: Computational Genomics and Molecular Medicine for Emerging {COVID-19}", journal = j-TCBB, volume = "18", number = "4", pages = "1227--1229", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3088319", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3088319", abstract = "The papers in this special section focus on computational genomics and molecular medicine for emerging COVID-19. In 2020, World Health Organization announced Coronavirus disease (COVID)-19 is a pandemic disease, which is devastated the socio-economic life \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cui:2021:IAI, author = "Chunmei Cui and Chuanbo Huang and Wanlu Zhou and Xiangwen Ji and Fenghong Zhang and Liang Wang and Yuan Zhou and Qinghua Cui", title = "\gene{AGTR2}, One Possible Novel Key Gene for the Entry of {SARS-CoV-2} Into Human Cells", journal = j-TCBB, volume = "18", number = "4", pages = "1230--1233", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3009099", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3009099", abstract = "Recently, it was confirmed that {$<$ italic$>$ACE2$<$}/{italic$>$} is the receptor of SARS-CoV-2, the pathogen causing the recent outbreak of severe pneumonia around the world. It is confused that {$<$ italic$>$ACE2$<$}/{italic$>$} is widely expressed across a variety of organs \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pathak:2021:DBC, author = "Yadunath Pathak and Piyush Kumar Shukla and K. V. Arya", title = "Deep Bidirectional Classification Model for {COVID-19} Disease Infected Patients", journal = j-TCBB, volume = "18", number = "4", pages = "1234--1241", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3009859", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3009859", abstract = "In December of 2019, a novel coronavirus (COVID-19) appeared in Wuhan city, China and has been reported in many countries with millions of people infected within only four months. Chest computed Tomography (CT) has proven to be a useful supplement to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lv:2021:DPR, author = "Jinxiong Lv and Shikui Tu and Lei Xu", title = "Detection of Phenotype-Related Mutations of {COVID-19} via the Whole Genomic Data", journal = j-TCBB, volume = "18", number = "4", pages = "1242--1249", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3049836", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3049836", abstract = "The coronavirus disease 2019 (COVID-19) epidemic continues to spread rapidly around the world and nearly 20 millions people are infected. This paper utilises both single-locus analysis and joint-SNPs analysis for detection of significant single nucleotide \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xiao:2021:DWS, author = "Ming Xiao and Guangdi Liu and Jianghang Xie and Zichun Dai and Zihao Wei and Ziyao Ren and Jun Yu and Le Zhang", title = "{2019nCoVAS}: Developing the Web Service for Epidemic Transmission Prediction, Genome Analysis, and Psychological Stress Assessment for {2019-nCoV}", journal = j-TCBB, volume = "18", number = "4", pages = "1250--1261", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3049617", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3049617", abstract = "Since the COVID-19 epidemic is still expanding around the world and poses a serious threat to human life and health, it is necessary for us to carry out epidemic transmission prediction, whole genome sequence analysis, and public psychological stress \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Selvaraj:2021:HTS, author = "Chandrabose Selvaraj and Dhurvas Chandrasekaran Dinesh and Umesh Panwar and Evzen Boura and Sanjeev Kumar Singh", title = "High-Throughput Screening and Quantum Mechanics for Identifying Potent Inhibitors Against {Mac1} Domain of {SARS-CoV-2} {Nsp3}", journal = j-TCBB, volume = "18", number = "4", pages = "1262--1270", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3037136", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3037136", abstract = "SARS-CoV-2 encodes the Mac1 domain within the large nonstructural protein 3 (Nsp3), which has an ADP-ribosylhydrolase activity conserved in other coronaviruses. The enzymatic activity of Mac1 makes it an essential virulence factor for the pathogenicity of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chakrabarty:2021:NBA, author = "Broto Chakrabarty and Dibyajyoti Das and Gopalakrishnan Bulusu and Arijit Roy", title = "Network-Based Analysis of Fatal Comorbidities of {COVID-19} and Potential Therapeutics", journal = j-TCBB, volume = "18", number = "4", pages = "1271--1280", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3075299", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3075299", abstract = "COVID-19 is a highly contagious disease caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The case-fatality rate is significantly higher in older patients and those with diabetes, cancer or cardiovascular disorders. The human \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jokinen:2021:DBS, author = "Elmeri M. Jokinen and Krishnasamy Gopinath and Sami T. Kurkinen and Olli T. Pentik{\"a}inen", title = "Detection of Binding Sites on {SARS-CoV-2} Spike Protein Receptor-Binding Domain by Molecular Dynamics Simulations in Mixed Solvents", journal = j-TCBB, volume = "18", number = "4", pages = "1281--1289", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3076259", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3076259", abstract = "The novel SARS-CoV-2 uses ACE2 (Angiotensin-Converting Enzyme 2) receptor as an entry point. Insights on S protein receptor-binding domain (RBD) interaction with ACE2 receptor and drug repurposing has accelerated drug discovery for the novel SARS-CoV-2 \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2021:LDS, author = "Deshan Zhou and Shaoliang Peng and Dong-Qing Wei and Wu Zhong and Yutao Dou and Xiaolan Xie", title = "{LUNAR}: Drug Screening for Novel Coronavirus Based on Representation Learning Graph Convolutional Network", journal = j-TCBB, volume = "18", number = "4", pages = "1290--1298", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3085972", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3085972", abstract = "An outbreak of COVID-19 that began in late 2019 was caused by a novel coronavirus(SARS-CoV-2). It has become a global pandemic. As of June 9, 2020, it has infected nearly 7 million people and killed more than 400,000, but there is no specific drug. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kaushik:2021:CAC, author = "Aman Chandra Kaushik and Aamir Mehmood and Gurudeeban Selvaraj and Xiaofeng Dai and Yi Pan and Dong-Qing Wei", title = "{CoronaPep}: an Anti-Coronavirus Peptide Generation Tool", journal = j-TCBB, volume = "18", number = "4", pages = "1299--1304", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3064630", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3064630", abstract = "The novel coronavirus (COVID-19) infections have adopted the shape of a global pandemic now, demanding an urgent vaccine design. The current work reports contriving an anti-coronavirus peptide scanner tool to discern anti-coronavirus targets in the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2021:KEM, author = "Zhimiao Yu and Jiarui Lu and Yuan Jin and Yang Yang", title = "{KenDTI}: an Ensemble Model for Predicting Drug-Target Interaction by Integrating Multi-Source Information", journal = j-TCBB, volume = "18", number = "4", pages = "1305--1314", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3074401", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3074401", abstract = "The identification of drug-target interactions (DTIs) is an essential step in the process of drug discovery. As experimental validation suffers from high cost and low success rate, various computational models have been exploited to infer potential DTIs. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2021:CNN, author = "ShanShan Hu and DeNan Xia and Benyue Su and Peng Chen and Bing Wang and Jinyan Li", title = "A Convolutional Neural Network System to Discriminate Drug-Target Interactions", journal = j-TCBB, volume = "18", number = "4", pages = "1315--1324", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2940187", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2940187", abstract = "Biological targets are most commonly proteins such as enzymes, ion channels, and receptors. They are anything within a living organism to bind with some other entities (like an endogenous ligand or a drug), resulting in change in their behaviors or \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2021:FFB, author = "Enze Liu and Jin Li and Garrett H. Kinnebrew and Pengyue Zhang and Yan Zhang and Lijun Cheng and Lang Li", title = "A Fast and Furious {Bayesian} Network and Its Application of Identifying Colon Cancer to Liver Metastasis Gene Regulatory Networks", journal = j-TCBB, volume = "18", number = "4", pages = "1325--1335", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2944826", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2944826", abstract = "Bayesian networks is a powerful method for identifying causal relationships among variables. However, as the network size increases, the time complexity of searching the optimal structure grows exponentially. We proposed a novel search algorithm --- Fast \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Carrillo-Cabada:2021:GEM, author = "Hector Carrillo-Cabada and Jeremy Benson and Asghar M. Razavi and Brianna Mulligan and Michel A. Cuendet and Harel Weinstein and Michela Taufer and Trilce Estrada", title = "A Graphic Encoding Method for Quantitative Classification of Protein Structure and Representation of Conformational Changes", journal = j-TCBB, volume = "18", number = "4", pages = "1336--1349", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2945291", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2945291", abstract = "In order to successfully predict a proteins function throughout its trajectory, in addition to uncovering changes in its conformational state, it is necessary to employ techniques that maintain its 3D information while performing at scale. We extend a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:LGC, author = "Aiying Zhang and Jian Fang and Wenxing Hu and Vince D. Calhoun and Yu-Ping Wang", title = "A Latent {Gaussian} Copula Model for Mixed Data Analysis in Brain Imaging Genetics", journal = j-TCBB, volume = "18", number = "4", pages = "1350--1360", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2950904", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2950904", abstract = "Recent advances in imaging genetics make it possible to combine different types of data including medical images like functional magnetic resonance imaging (fMRI) and genetic data like single nucleotide polymorphisms (SNPs) for comprehensive diagnosis of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Angadi:2021:NWC, author = "Ulavappa B. Angadi and Krishna Kumar Chaturvedi and Sudhir Srivastava and Anil Rai", title = "A Novel Way of Comparing Protein {$3$D} Structure Using Graph Partitioning Approach", journal = j-TCBB, volume = "18", number = "4", pages = "1361--1368", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2938948", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2938948", abstract = "Alignment and comparison of protein 3D structures is an important and fundamental task in structural biology to study evolutionary, functional and structural relatedness among proteins. Since two decades, the research on protein structure alignment has \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:AMA, author = "Liguang Wang and Yujia Wang and Yi Fu and Yunge Gao and Jiawei Du and Chen Yang and Jianxiao Liu", title = "{AFSBN}: a Method of Artificial Fish Swarm Optimizing {Bayesian} Network for Epistasis Detection", journal = j-TCBB, volume = "18", number = "4", pages = "1369--1383", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2949780", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2949780", abstract = "How to mine the interaction between SNPs (namely epistasis) efficiently and accurately must be considered when to tackle the complexity of underlying biological mechanisms. In order to overcome the defect of low learning efficiency and local optimal, this \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2021:CCD, author = "Ziying Yang and Guoxian Yu and Maozu Guo and Jiantao Yu and Xiangliang Zhang and Jun Wang", title = "{CDPath}: Cooperative Driver Pathways Discovery Using Integer Linear Programming and {Markov} Clustering", journal = j-TCBB, volume = "18", number = "4", pages = "1384--1395", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2945029", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2945029", abstract = "Discovering driver pathways is an essential task to understand the pathogenesis of cancer and to design precise treatments for cancer patients. Increasing evidences have been indicating that multiple pathways often function cooperatively in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2021:CDD, author = "Bing Nan Li and Xinle Wang and Rong Wang and Teng Zhou and Rongke Gao and Edward J. Ciaccio and Peter H. Green", title = "Celiac Disease Detection From Videocapsule Endoscopy Images Using Strip Principal Component Analysis", journal = j-TCBB, volume = "18", number = "4", pages = "1396--1404", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2953701", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2953701", abstract = "The purpose of this study was to implement principal component analysis (PCA) on videocapsule endoscopy (VE) images to develop a new computerized tool for celiac disease recognition. Three PCA algorithms were implemented for feature extraction and sparse \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ge:2021:CMB, author = "Yan Ge and Philipp Rosendahl and Claudio Dur{\'a}n and Nicole T{\"o}pfner and Sara Ciucci and Jochen Guck and Carlo Vittorio Cannistraci", title = "Cell Mechanics Based Computational Classification of Red Blood Cells Via Machine Intelligence Applied to Morpho-Rheological Markers", journal = j-TCBB, volume = "18", number = "4", pages = "1405--1415", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2945762", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2945762", abstract = "Despite fluorescent cell-labelling being widely employed in biomedical studies, some of its drawbacks are inevitable, with unsuitable fluorescent probes or probes inducing a functional change being the main limitations. Consequently, the demand for and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:CES, author = "Wenju Zhang and Zhewei Liang and Xin Chen and Lei Xin and Baozhen Shan and Zhigang Luo and Ming Li", title = "{ChimST}: an Efficient Spectral Library Search Tool for Peptide Identification from Chimeric Spectra in Data-Dependent Acquisition", journal = j-TCBB, volume = "18", number = "4", pages = "1416--1425", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2945954", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2945954", abstract = "Accurate and sensitive identification of peptides from MS/MS spectra is a very challenging problem in computational shotgun proteomics. To tackle this problem, spectral library search has been one of the competitive solutions. However, most existing \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Maheshwari:2021:CVA, author = "Sidharth Maheshwari and Venkateshwarlu Y. Gudur and Rishad Shafik and Ian Wilson and Alex Yakovlev and Amit Acharyya", title = "{CORAL}: Verification-Aware {OpenCL} Based Read Mapper for Heterogeneous Systems", journal = j-TCBB, volume = "18", number = "4", pages = "1426--1438", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2943856", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2943856", abstract = "Genomics has the potential to transform medicine from reactive to a personalized, predictive, preventive, and participatory (P4) form. Being a Big Data application with continuously increasing rate of data production, the computational costs of genomics \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2021:CSP, author = "Yingwen Zhao and Jun Wang and Maozu Guo and Xiangliang Zhang and Guoxian Yu", title = "Cross-Species Protein Function Prediction with Asynchronous-Random Walk", journal = j-TCBB, volume = "18", number = "4", pages = "1439--1450", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2943342", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2943342", abstract = "Protein function prediction is a fundamental task in the post-genomic era. Available functional annotations of proteins are incomplete and the annotations of two homologous species are complementary to each other. However, how to effectively leverage {$<$}. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:DNG, author = "Jun Zhang and Qingcai Chen and Bin Liu", title = "{DeepDRBP-2L}: a New Genome Annotation Predictor for Identifying {DNA}-Binding Proteins and {RNA}-Binding Proteins Using Convolutional Neural Network and Long Short-Term Memory", journal = j-TCBB, volume = "18", number = "4", pages = "1451--1463", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2952338", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2952338", abstract = "DNA-binding proteins (DBPs) and RNA-binding proteins (RBPs) are two kinds of crucial proteins, which are associated with various cellule activities and some important diseases. Accurate identification of DBPs and RBPs facilitate both theoretical research \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2021:DPS, author = "Shaoliang Peng and Yaning Yang and Wei Liu and Fei Li and Xiangke Liao", title = "Discriminant Projection Shared Dictionary Learning for Classification of Tumors Using Gene Expression Data", journal = j-TCBB, volume = "18", number = "4", pages = "1464--1473", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2950209", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2950209", abstract = "With a variety of tumor subtypes, personalized treatments need to identify the subtype of a tumor as accurately as possible. The development of DNA microarrays provides an opportunity to predict tumor classification. One strategy is to use gene expression \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ozden:2021:DDR, author = "Furkan Ozden and Metin Can Siper and Necmi Acarsoy and Tugrulcan Elmas and Bryan Marty and Xinjian Qi and A. Ercument Cicek", title = "{DORMAN}: {Database of Reconstructed MetAbolic Networks}", journal = j-TCBB, volume = "18", number = "4", pages = "1474--1480", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2944905", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2944905", abstract = "Genome-scale reconstructed metabolic networks have provided an organism specific understanding of cellular processes and their relations to phenotype. As they are deemed essential to study metabolism, the number of organisms with reconstructed metabolic \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2021:DDS, author = "Yue Hu and Jin-Xing Liu and Ying-Lian Gao and Junliang Shang", title = "{DSTPCA}: Double-Sparse Constrained Tensor Principal Component Analysis Method for Feature Selection", journal = j-TCBB, volume = "18", number = "4", pages = "1481--1491", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2943459", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2943459", abstract = "The identification of differentially expressed genes plays an increasingly important role biologically. Therefore, the feature selection approach has attracted much attention in the field of bioinformatics. The most popular method of principal component \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liao:2021:ESF, author = "Xingyu Liao and Min Li and Junwei Luo and You Zou and Fang-Xiang Wu and Yi-Pan and Feng Luo and Jianxin Wang", title = "{EPGA-SC}: a Framework for {\em de novo\/} Assembly of Single-Cell Sequencing Reads", journal = j-TCBB, volume = "18", number = "4", pages = "1492--1503", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2945761", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2945761", abstract = "Assembling genomes from single-cell sequencing data is essential for single-cell studies. However, single-cell assemblies are challenging due to (i) the highly non-uniform read coverage and (ii) the elevated levels of sequencing errors and chimeric reads. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Javadi:2021:FSM, author = "Afsaneh Javadi and Faezeh Keighobadi and Vahab Nekoukar and Marzieh Ebrahimi", title = "Finite-Set Model Predictive Control of Melanoma Cancer Treatment Using Signaling Pathway Inhibitor of Cancer Stem Cell", journal = j-TCBB, volume = "18", number = "4", pages = "1504--1511", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2940658", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2940658", abstract = "Drug delivery is one of the most important issues in the treatment of cancer and surviving the patient. Recently, with a combination of mathematical models of the tumor growth and control theory, optimal drug delivery can be planned, individually. The \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:GSH, author = "Chao Wang and Lei Gong and Shiming Lei and Haijie Fang and Xi Li and Aili Wang and Xuehai Zhou", title = "{GenSeq+}: a Scalable High-Performance Accelerator for Genome Sequencing", journal = j-TCBB, volume = "18", number = "4", pages = "1512--1523", month = jul # "\slash " # aug, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2947059", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:54 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2947059", abstract = "Genome sequencing is one of the most challenging problems in computational biology and bioinformatics. As a traditional algorithm, the string match meets a challenge with the development of the massive volume of data because of gene sequencing. Surveys \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xia:2021:GEA, author = "Kaijian Xia and Yizhang Jiang and Yudong Zhang and Wen Si", title = "Guest Editorial: Advanced Machine-Learning Methods for Brain-Machine Interfacing or Brain-Computer Interfacing", journal = j-TCBB, volume = "18", number = "5", pages = "1643--1644", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3078145", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3078145", abstract = "The seven papers in this special section focus on advanced machine learning methods for brain machine interfacing. Particular emphasis is on novel theories and methods using transfer learning and deep learning proposed for Brain-Machine Interfacing (BMI) \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gu:2021:EBB, author = "Xiaotong Gu and Zehong Cao and Alireza Jolfaei and Peng Xu and Dongrui Wu and Tzyy-Ping Jung and Chin-Teng Lin", title = "{EEG}-Based Brain-Computer Interfaces {(BCIs)}: a Survey of Recent Studies on Signal Sensing Technologies and Computational Intelligence Approaches and Their Applications", journal = j-TCBB, volume = "18", number = "5", pages = "1645--1666", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3052811", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3052811", abstract = "Brain-Computer interfaces (BCIs) enhance the capability of human brain activities to interact with the environment. Recent advancements in technology and machine learning algorithms have increased interest in electroencephalographic (EEG)-based BCI \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:ESR, author = "Yuanpeng Zhang and Ziyuan Zhou and Wenjie Pan and Heming Bai and Wei Liu and Li Wang and Chuang Lin", title = "Epilepsy Signal Recognition Using Online Transfer {TSK} Fuzzy Classifier Underlying Classification Error and Joint Distribution Consensus Regularization", journal = j-TCBB, volume = "18", number = "5", pages = "1667--1678", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3002562", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3002562", abstract = "In this study, an online transfer TSK fuzzy classifier O-T-TSK-FC is proposed for recognizing epilepsy signals. Compared with most of the existing transfer learning models, O-T-TSK-FC enjoys its merits from the following three aspects: (1) Since different \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gu:2021:HDS, author = "Xiaoqing Gu and Cong Zhang and TongGuang Ni", title = "A Hierarchical Discriminative Sparse Representation Classifier for {EEG} Signal Detection", journal = j-TCBB, volume = "18", number = "5", pages = "1679--1687", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3006699", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3006699", abstract = "Classification of electroencephalogram (EEG) signal data plays a vital role in epilepsy detection. Recently sparse representation-based classification (SRC) methods have achieved the good performance in EEG signal automatic detection, by which the EEG \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lv:2021:AML, author = "Zhihan Lv and Liang Qiao and Qingjun Wang and Francesco Piccialli", title = "Advanced Machine-Learning Methods for Brain-Computer Interfacing", journal = j-TCBB, volume = "18", number = "5", pages = "1688--1698", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3010014", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3010014", abstract = "The brain-computer interface (BCI) connects the brain and the external world through an information transmission channel by interpreting the physiological information of the brain during thinking activities. The effective classification of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2021:MSA, author = "Bo Lin and Shuiguang Deng and Honghao Gao and Jianwei Yin", title = "A Multi-Scale Activity Transition Network for Data Translation in {EEG} Signals Decoding", journal = j-TCBB, volume = "18", number = "5", pages = "1699--1709", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3024228", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3024228", abstract = "Electroencephalogram (EEG) is a non-invasive collection method for brain signals. It has broad prospects in brain-computer interface (BCI) applications. Recent advances have shown the effectiveness of the widely used convolutional neural network (CNN) in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2021:SIE, author = "Shuaiqi Liu and Xu Wang and Ling Zhao and Jie Zhao and Qi Xin and Shui-Hua Wang", title = "Subject-Independent Emotion Recognition of {EEG} Signals Based on Dynamic Empirical Convolutional Neural Network", journal = j-TCBB, volume = "18", number = "5", pages = "1710--1721", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3018137", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3018137", abstract = "Affective computing is one of the key technologies to achieve advanced brain-machine interfacing. It is increasingly concerning research orientation in the field of artificial intelligence. Emotion recognition is closely related to affective computing. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2021:PHI, author = "Chenxi Huang and Yutian Xiao and Gaowei Xu", title = "Predicting Human Intention-Behavior Through {EEG} Signal Analysis Using Multi-Scale {CNN}", journal = j-TCBB, volume = "18", number = "5", pages = "1722--1729", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3039834", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3039834", abstract = "At present, the application of Electroencephalogram (EEG) signal classification to human intention-behavior prediction has become a hot topic in the brain computer interface (BCI) research field. In recent studies, the introduction of convolutional neural \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2021:GES, author = "De-Shuang Huang and Vitoantonio Bevilacqua and M. Michael Gromiha", title = "Guest Editorial for Special Section on the {15th International Conference on Intelligent Computing (ICIC)}", journal = j-TCBB, volume = "18", number = "5", pages = "1730--1732", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3065722", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3065722", abstract = "The papers in this special section were presented at the Fifteenth International Conference on Intelligent Computing (ICIC) held on August 3-6, 2019, in Nanchang, Jiangxi Province, China.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zheng:2021:MIL, author = "Kai Zheng and Zhu-Hong You and Lei Wang and Yi-Ran Li and Ji-Ren Zhou and Hai-Tao Zeng", title = "{MISSIM}: an Incremental Learning-Based Model With Applications to the Prediction of {miRNA}-Disease Association", journal = j-TCBB, volume = "18", number = "5", pages = "1733--1742", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3013837", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3013837", abstract = "In the past few years, the prediction models have shown remarkable performance in most biological correlation prediction tasks. These tasks traditionally use a fixed dataset, and the model, once trained, is deployed as is. These models often encounter \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:PTD, author = "Qinhu Zhang and Dailun Wang and Kyungsook Han and De-Shuang Huang", title = "Predicting {TF-DNA} Binding Motifs from {ChIP}-seq Datasets Using the Bag-Based Classifier Combined With a Multi-Fold Learning Scheme", journal = j-TCBB, volume = "18", number = "5", pages = "1743--1751", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3025007", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3025007", abstract = "The rapid development of high-throughput sequencing technology provides unique opportunities for studying of transcription factor binding sites, but also brings new computational challenges. Recently, a series of discriminative motif discovery (DMD) \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2021:EPE, author = "Hongjie Wu and Huajing Ling and Lei Gao and Qiming Fu and Weizhong Lu and Yijie Ding and Min Jiang and Haiou Li", title = "Empirical Potential Energy Function Toward ab Initio Folding {G} Protein-Coupled Receptors", journal = j-TCBB, volume = "18", number = "5", pages = "1752--1762", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3008014", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3008014", abstract = "Approximately 40&\#x2013;50 percent of all drugs targets are G protein-coupled receptors (GPCRs). Three-dimensional structure of GPCRs is important to probe their biophysical and biochemical functions and their pharmaceutical applications. Lacking reliable \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2021:DPM, author = "Yue Liu and Shu-Lin Wang and Jun-Feng Zhang and Wei Zhang and Su Zhou and Wen Li", title = "{DMFMDA}: Prediction of Microbe-Disease Associations Based on Deep Matrix Factorization Using {Bayesian} Personalized Ranking", journal = j-TCBB, volume = "18", number = "5", pages = "1763--1772", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3018138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3018138", abstract = "Identifying the microbe-disease associations is conducive to understanding the pathogenesis of disease from the perspective of microbe. In this paper, we propose a deep matrix factorization prediction model (DMFMDA) based on deep neural network. First, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2021:FFR, author = "Xinguo Lu and Xinyu Wang and Li Ding and Jinxin Li and Yan Gao and Keren He", title = "{frDriver}: a Functional Region Driver Identification for Protein Sequence", journal = j-TCBB, volume = "18", number = "5", pages = "1773--1783", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3020096", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3020096", abstract = "Identifying cancer drivers is a crucial challenge to explain the underlying mechanisms of cancer development. There are many methods to identify cancer drivers based on the single mutation site or the entire gene. But they ignore a large number of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xie:2021:PEG, author = "Jiang Xie and Chang Zhao and Jiamin Sun and Jiaxin Li and Fuzhang Yang and Jiao Wang and Qing Nie", title = "Prediction of Essential Genes in Comparison States Using Machine Learning", journal = j-TCBB, volume = "18", number = "5", pages = "1784--1792", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3027392", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3027392", abstract = "Identifying essential genes in comparison states (EGS) is vital to understanding cell differentiation, performing drug discovery, and identifying disease causes. Here, we present a machine learning method termed Prediction of Essential Genes in Comparison \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:MSC, author = "Qinhu Zhang and Wenbo Yu and Kyungsook Han and Asoke K. Nandi and De-Shuang Huang", title = "Multi-Scale Capsule Network for Predicting {DNA-Protein} Binding Sites", journal = j-TCBB, volume = "18", number = "5", pages = "1793--1800", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3025579", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3025579", abstract = "Discovering DNA-protein binding sites, also known as motif discovery, is the foundation for further analysis of transcription factors (TFs). Deep learning algorithms such as convolutional neural networks (CNN) have been introduced to motif discovery task \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2021:DLB, author = "Menglu Li and Yanan Wang and Fuyi Li and Yun Zhao and Mengya Liu and Sijia Zhang and Yannan Bin and A. Ian Smith and Geoffrey I. Webb and Jian Li and Jiangning Song and Junfeng Xia", title = "A Deep Learning-Based Method for Identification of Bacteriophage-Host Interaction", journal = j-TCBB, volume = "18", number = "5", pages = "1801--1810", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3017386", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3017386", abstract = "Multi-drug resistance (MDR) has become one of the greatest threats to human health worldwide, and novel treatment methods of infections caused by MDR bacteria are urgently needed. Phage therapy is a promising alternative to solve this problem, to which \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yuan:2021:LOF, author = "Xiguo Yuan and Junping Li and Jun Bai and Jianing Xi", title = "A Local Outlier Factor-Based Detection of Copy Number Variations From {NGS} Data", journal = j-TCBB, volume = "18", number = "5", pages = "1811--1820", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2961886", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2961886", abstract = "Copy number variation (CNV) is a major type of genomic structural variations that play an important role in human disorders. Next generation sequencing (NGS) has fueled the advancement in algorithm design to detect CNVs at base-pair resolution. However, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2021:NCP, author = "Hai-Hui Huang and Yong Liang", title = "A Novel {Cox} Proportional Hazards Model for High-Dimensional Genomic Data in Cancer Prognosis", journal = j-TCBB, volume = "18", number = "5", pages = "1821--1830", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2961667", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2961667", abstract = "The Cox proportional hazards model is a popular method to study the connection between feature and survival time. Because of the high-dimensionality of genomic data, existing Cox models trained on any specific dataset often generalize poorly to other \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:AEP, author = "Jinhao Zhang and Zehua Zhang and Lianrong Pu and Jijun Tang and Fei Guo", title = "{AIEpred}: an Ensemble Predictive Model of Classifier Chain to Identify Anti-Inflammatory Peptides", journal = j-TCBB, volume = "18", number = "5", pages = "1831--1840", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2968419", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2968419", abstract = "Anti-inflammatory peptides (AIEs) have recently emerged as promising therapeutic agent for treatment of various inflammatory diseases, such as rheumatoid arthritis and Alzheimer&\#x2019;s disease. Therefore, detecting the correlation between amino acid \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qian:2021:AFS, author = "Ying Qian and Yu Zhang and Jiongmin Zhang", title = "Alignment-Free Sequence Comparison With Multiple $k$ Values", journal = j-TCBB, volume = "18", number = "5", pages = "1841--1849", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2955081", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2955081", abstract = "Alignment-free sequence comparison approaches have become increasingly popular in computational biology, because alignment-based approaches are inefficient to process large-scale datasets. Still, there is no way to determine the optimal value of the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2021:ACT, author = "Benlian Xu and Jian Shi and Mingli Lu and Jinliang Cong and Ling Wang and Brett Nener", title = "An Automated Cell Tracking Approach With Multi-{Bernoulli} Filtering and Ant Colony Labor Division", journal = j-TCBB, volume = "18", number = "5", pages = "1850--1863", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2954502", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2954502", abstract = "In this article, we take as inspiration the labor division into scouts and workers in an ant colony and propose a novel approach for automated cell tracking in the framework of multi-Bernoulli random finite sets. To approximate the Bernoulli parameter \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kumar:2021:AAP, author = "Gautam Kumar and Rajnish Kumar and Manoj Kumar Pal and Nilotpal Pramanik and Tapobrata Lahiri and Ankita Gupta and Saket Pandey", title = "{APT}: an Automated Probe Tracker From Gene Expression Data", journal = j-TCBB, volume = "18", number = "5", pages = "1864--1874", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2958345", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2958345", abstract = "Out of currently available semi-automatic tools for detecting diagnostic probes relevant to a pathophysiological condition, ArrayMining and GEO2R of NCBI are most popular. The shortcomings of ArrayMining and GEO2R are that both tools list the probes \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:BSM, author = "Yue Zhang and Chunfang Zheng and Sindeed Islam and Yong-Min Kim and David Sankoff", title = "Branching Out to Speciation in a Model of Fractionation: The {Malvaceae}", journal = j-TCBB, volume = "18", number = "5", pages = "1875--1884", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2955649", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2955649", abstract = "Fractionation is the genome-wide process of losing one gene per duplicate pair following whole genome doubling (WGD). An important type of evidence for duplicate gene loss is the frequency distribution of similarities between paralogous gene pairs in a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ghasemnejad:2021:DNM, author = "Atefeh Ghasemnejad and Samira Bazmara and Mahsa Shadmani and Kamran Pooshang Bagheri", title = "Designing a New Multi-Epitope Pertussis Vaccine with Highly Population Coverage Based on a Novel Sequence and Structural Filtration Algorithm", journal = j-TCBB, volume = "18", number = "5", pages = "1885--1892", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2958803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2958803", abstract = "Pertussis vaccine is produced from physicochemically inactivated toxin for many years. Recent advancements in immunoinformatics [N. Tomar and R. K. De, ``Immunoinformatics: an integrated scenario,'' {\em Immunology}, vol. 131, no. 2,. \ldots{}]", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yuan:2021:ENS, author = "Xiguo Yuan and Xiangyan Xu and Haiyong Zhao and Junbo Duan", title = "{ERINS}: Novel Sequence Insertion Detection by Constructing an Extended Reference", journal = j-TCBB, volume = "18", number = "5", pages = "1893--1901", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2954315", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2954315", abstract = "Next generation sequencing technology has led to the development of methods for the detection of novel sequence insertions (nsINS). Multiple signatures from short reads are usually extracted to improve nsINS detection performance. However, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kumari:2021:EMD, author = "Chetna Kumari and Muhammad Abulaish and Naidu Subbarao", title = "Exploring Molecular Descriptors and Fingerprints to Predict {mTOR} Kinase Inhibitors using Machine Learning Techniques", journal = j-TCBB, volume = "18", number = "5", pages = "1902--1913", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2964203", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2964203", abstract = "Mammalian Target of Rapamycin (mTOR) is a Ser/Thr protein kinase, and its role is integral to the autophagy pathway in cancer. Targeting mTOR for therapeutic interventions in cancer through autophagy pathway is challenging due to the dual roles of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2021:FSB, author = "Jiaojiao Chen and Jianbo Jiao and Shengfeng He and Guoqiang Han and Jing Qin", title = "Few-Shot Breast Cancer Metastases Classification via Unsupervised Cell Ranking", journal = j-TCBB, volume = "18", number = "5", pages = "1914--1923", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2960019", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2960019", abstract = "Tumor metastases detection is of great importance for the treatment of breast cancer patients. Various CNN (convolutional neural network) based methods get excellent performance in object detection/segmentation. However, the detection of metastases in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Medhat:2021:FRA, author = "Belal Medhat and Ahmed Shawish", title = "{FLR}: a Revolutionary Alignment-Free Similarity Analysis Methodology for {DNA}-Sequences", journal = j-TCBB, volume = "18", number = "5", pages = "1924--1936", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2967385", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2967385", abstract = "This paper introduces a novel alignment-free sequence analysis methodology. Its main idea is based on introducing a new representation of the DNA-Sequence. This representation breaks the dependency between the DNA bases that exist in the traditional \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jia:2021:FPL, author = "Cangzhi Jia and Meng Zhang and Cunshuo Fan and Fuyi Li and Jiangning Song", title = "Formator: Predicting {Lysine} Formylation Sites Based on the Most Distant Undersampling and Safe-Level Synthetic Minority Oversampling", journal = j-TCBB, volume = "18", number = "5", pages = "1937--1945", month = sep # "\slash " # oct, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2957758", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:56 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2957758", abstract = "Lysine formylation is a reversible type of protein post-translational modification and has been found to be involved in a myriad of biological processes, including modulation of chromatin conformation and gene expression in histones and other nuclear \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Aluru:2021:ENE, author = "Srinivas Aluru", title = "Editorial: From the New {Editor-in-Chief}", journal = j-TCBB, volume = "18", number = "6", pages = "2058--2058", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3108133", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3108133", abstract = "I am delighted to have the opportunity to serve you as the next editor-in-chief of this prestigious journal, beginning August 2021. I regard the IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB) as the premier journal devoted to the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Martin-Vide:2021:ACBb, author = "Carlos Mart{\'\i}n-Vide and Miguel A. Vega-Rodr{\'\i}guez", title = "{{\booktitle{Algorithms for Computational Biology}}}: Seventh Edition", journal = j-TCBB, volume = "18", number = "6", pages = "2059--2060", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3099915", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3099915", abstract = "The papers in this special section were presented at the Seventh International Conference on Algorithms for Computational Biology, AlCoB 2020, held in Missoula, Montana on November 8-11, 2021 merged with AlCoB 2021. The conference was organized by the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Debnath:2021:FOC, author = "Tathagata Debnath and Mingzhou Song", title = "Fast Optimal Circular Clustering and Applications on Round Genomes", journal = j-TCBB, volume = "18", number = "6", pages = "2061--2071", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3077573", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3077573", abstract = "Round genomes are found in bacteria, plant chloroplasts, and mitochondria. Genetic or epigenetic marks can present biologically interesting clusters along a circular genome. The circular data clustering problem groups \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liang:2021:STA, author = "Shaoheng Liang and Qingnan Liang and Rui Chen and Ken Chen", title = "Stratified Test Accurately Identifies Differentially Expressed Genes Under Batch Effects in Single-Cell Data", journal = j-TCBB, volume = "18", number = "6", pages = "2072--2079", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3094650", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3094650", abstract = "Analyzing single-cell sequencing data from large cohorts is challenging. Discrepancies across experiments and differences among participants often lead to omissions and false discoveries in differentially expressed genes. We find that the Van Elteren test,. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Oliveira:2021:SPI, author = "Andre Rodrigues Oliveira and G{\'e}raldine Jean and Guillaume Fertin and Klairton Lima Brito and Ulisses Dias and Zanoni Dias", title = "Sorting Permutations by Intergenic Operations", journal = j-TCBB, volume = "18", number = "6", pages = "2080--2093", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3077418", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3077418", abstract = "Genome Rearrangements are events that affect large stretches of genomes during evolution. Many mathematical models have been used to estimate the evolutionary distance between two genomes based on genome rearrangements. However, most of them focused on \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Siqueira:2021:HGR, author = "Gabriel Siqueira and Klairton Lima Brito and Ulisses Dias and Zanoni Dias", title = "Heuristics for Genome Rearrangement Distance With Replicated Genes", journal = j-TCBB, volume = "18", number = "6", pages = "2094--2108", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3095021", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3095021", abstract = "In comparative genomics, one goal is to find similarities between genomes of different organisms. Comparisons using genome features like genes, gene order, and regulatory sequences are carried out with this purpose in mind. Genome rearrangements are \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Steinauer:2021:CMG, author = "Nickolas Steinauer and Kevin Zhang and Chun Guo and Jinsong Zhang", title = "Computational Modeling of Gene-Specific Transcriptional Repression, Activation and Chromatin Interactions in Leukemogenesis by {LASSO}-Regularized Logistic Regression", journal = j-TCBB, volume = "18", number = "6", pages = "2109--2122", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3078128", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3078128", abstract = "Many physiological and pathological pathways are dependent on gene-specific on/off regulation of transcription. Some genes are repressed, while others are activated. Although many previous studies have analyzed the mechanisms of gene-specific repression \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:GEA, author = "Louxin Zhang and Shaoliang Peng and Yi-Ping Phoebe Chen and David Sankoff and Guoliang Li and Hong-Yu Zhang", title = "Guest Editorial for the {17th Asia Pacific Bioinformatics Conference}", journal = j-TCBB, volume = "18", number = "6", pages = "2123--2124", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3099948", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3099948", abstract = "The eight papers in this special section were presented at the 17th Asia Pacific Bioinformatics Conference (APBC), which was held in Wuhan, China, 14-16 January 2019.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gorecki:2021:UDD, author = "Pawe{\l} G{\'o}recki and Oliver Eulenstein and Jerzy Tiuryn", title = "The Unconstrained Diameters of the Duplication-Loss Cost and the Loss Cost", journal = j-TCBB, volume = "18", number = "6", pages = "2125--2135", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2919617", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2919617", abstract = "Tree reconciliation costs are a popular choice to account for the discordance between the evolutionary history of a gene family (i.e., a gene tree), and the species tree through which this family has evolved. This discordance is accounted for by the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Grueter:2021:RRS, author = "Melissa Grueter and Kalani Duran and Ramya Ramalingam and Ran Libeskind-Hadas", title = "Reconciliation Reconsidered: In Search of a Most Representative Reconciliation in the Duplication-Transfer-Loss Model", journal = j-TCBB, volume = "18", number = "6", pages = "2136--2143", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2942015", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2942015", abstract = "Maximum parsimony reconciliation is a fundamental technique for studying the evolutionary histories of pairs of entities such as genes and species, parasites and hosts, and species and their biogeographical habitats. In these contexts, reconciliation is \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Du:2021:MOR, author = "Haoxing Du and Yi Sheng Ong and Marina Knittel and Ross Mawhorter and Nuo Liu and Gianluca Gross and Reiko Tojo and Ran Libeskind-Hadas and Yi-Chieh Wu", title = "Multiple Optimal Reconciliations Under the Duplication-Loss-Coalescence Model", journal = j-TCBB, volume = "18", number = "6", pages = "2144--2156", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2922337", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2922337", abstract = "Gene trees can differ from species trees due to a variety of biological phenomena, the most prevalent being gene duplication, horizontal gene transfer, gene loss, and coalescence. To explain topological incongruence between the two trees, researchers \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2021:DMS, author = "Hongzhe Guo and Yilei Fu and Yan Gao and Junyi Li and Yadong Wang and Bo Liu", title = "{deGSM}: Memory Scalable Construction Of Large Scale {de Bruijn} Graph", journal = j-TCBB, volume = "18", number = "6", pages = "2157--2166", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2913932", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2913932", abstract = "The de Bruijn graph, a fundamental data structure to represent and organize genome sequence, plays important roles in various kinds of sequence analysis tasks. With the rapid development of HTS data and ever-increasing number of assembled genomes, there \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Su:2021:TOD, author = "Cui Su and Jun Pang and Soumya Paul", title = "Towards Optimal Decomposition of {Boolean} Networks", journal = j-TCBB, volume = "18", number = "6", pages = "2167--2176", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2914051", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2914051", abstract = "In recent years, great efforts have been made to analyze biological systems to understand the long-run behaviors. As a well-established formalism for modelling real-life biological systems, Boolean networks (BNs) allow their representation and analysis \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hartmann:2021:SSP, author = "Tom Hartmann and Max Bannach and Martin Middendorf", title = "Sorting Signed Permutations by Inverse Tandem Duplication Random Losses", journal = j-TCBB, volume = "18", number = "6", pages = "2177--2188", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2917198", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2917198", abstract = "Gene order evolution of unichromosomal genomes, for example mitochondrial genomes, has been modelled mostly by four major types of genome rearrangements: inversions, transpositions, inverse transpositions, and tandem duplication random losses. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Le:2021:PFB, author = "Nguyen Quoc Khanh Le and Binh P. Nguyen", title = "Prediction of {FMN} Binding Sites in Electron Transport Chains Based on {$2$-D} {CNN} and {PSSM} Profiles", journal = j-TCBB, volume = "18", number = "6", pages = "2189--2197", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2932416", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2932416", abstract = "Flavin mono-nucleotides (FMNs) are cofactors that hold responsibility for carrying and transferring electrons in the electron transport chain stage of cellular respiration. Without being facilitated by FMNs, energy production is stagnant due to the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ozgul:2021:CDC, author = "Ozan Firat {\"O}zg{\"u}l and Batuhan Bardak and Mehmet Tan", title = "A Convolutional Deep Clustering Framework for Gene Expression Time Series", journal = j-TCBB, volume = "18", number = "6", pages = "2198--2207", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2988985", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2988985", abstract = "The functional or regulatory processes within the cell are explicitly governed by the expression levels of a subset of its genes. Gene expression time series captures activities of individual genes over time and aids revealing underlying cellular \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2021:DLF, author = "Fuhao Zhang and Hong Song and Min Zeng and Fang-Xiang Wu and Yaohang Li and Yi Pan and Min Li", title = "A Deep Learning Framework for Gene Ontology Annotations With Sequence- and Network-Based Information", journal = j-TCBB, volume = "18", number = "6", pages = "2208--2217", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2968882", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2968882", abstract = "Knowledge of protein functions plays an important role in biology and medicine. With the rapid development of high-throughput technologies, a huge number of proteins have been discovered. However, there are a great number of proteins without functional \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2021:MCI, author = "Zilu Wang and Qinghui Hong and Xiaoping Wang", title = "A Memristive Circuit Implementation of Eyes State Detection in Fatigue Driving Based on Biological Long Short-Term Memory Rule", journal = j-TCBB, volume = "18", number = "6", pages = "2218--2229", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2974944", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2974944", abstract = "Biological long short-term memory (B-LSTM) can effectively help human process all kinds of received information. In this work, a memristive B-LSTM circuit which mimics a conversion from short-term memory to long-term memory is proposed. That is, the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{daSilva:2021:NFS, author = "Pablo Nascimento da Silva and Alexandre Plastino and Fabio Fabris and Alex A. Freitas", title = "A Novel Feature Selection Method for Uncertain Features: an Application to the Prediction of {Pro-\slash} Anti-Longevity Genes", journal = j-TCBB, volume = "18", number = "6", pages = "2230--2238", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2988450", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2988450", abstract = "Understanding the ageing process is a very challenging problem for biologists. To help in this task, there has been a growing use of classification methods (from machine learning) to learn models that predict whether a gene influences the process of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2021:AMI, author = "Dong Li and Zhisong Pan and Guyu Hu and Graham Anderson and Shan He", title = "Active Module Identification From Multilayer Weighted Gene Co-Expression Networks: a Continuous Optimization Approach", journal = j-TCBB, volume = "18", number = "6", pages = "2239--2248", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2970400", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2970400", abstract = "Searching for active modules, i.e., regions showing striking changes in molecular activity in biological networks is important to reveal regulatory and signaling mechanisms of biological systems. Most existing active modules identification methods are \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fang:2021:EBB, author = "Qiong Fang and Dewei Su and Wilfred Ng and Jianlin Feng", title = "An Effective Biclustering-Based Framework for Identifying Cell Subpopulations From {scRNA}-seq Data", journal = j-TCBB, volume = "18", number = "6", pages = "2249--2260", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2979717", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2979717", abstract = "The advent of single-cell RNA sequencing (scRNA-seq) techniques opens up new opportunities for studying the cell-specific changes in the transcriptomic data. An important research problem related with scRNA-seq data analysis is to identify cell \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2021:CMC, author = "Junyi Chen and Junhui Hou and Ka-Chun Wong", title = "Categorical Matrix Completion With Active Learning for High-Throughput Screening", journal = j-TCBB, volume = "18", number = "6", pages = "2261--2270", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2982142", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2982142", abstract = "The recent advances in wet-lab automation enable high-throughput experiments to be conducted seamlessly. In particular, the exhaustive enumeration of all possible conditions is always involved in high-throughput screening. Nonetheless, such a screening \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ogundijo:2021:CIT, author = "Oyetunji E. Ogundijo and Kaiyi Zhu and Xiaodong Wang and Dimitris Anastassiou", title = "Characterizing Intra-Tumor Heterogeneity From Somatic Mutations Without Copy-Neutral Assumption", journal = j-TCBB, volume = "18", number = "6", pages = "2271--2280", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2973635", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2973635", abstract = "Bulk samples of the same patient are heterogeneous in nature, comprising of different subpopulations (subclones) of cancer cells. Cells in a tumor subclone are characterized by unique mutational genotype profile. Resolving tumor heterogeneity by \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yuan:2021:CMC, author = "Shaoxun Yuan and Haitao Li and Jiansheng Wu and Xiao Sun", title = "Classification of Mild Cognitive Impairment With Multimodal Data Using Both Labeled and Unlabeled Samples", journal = j-TCBB, volume = "18", number = "6", pages = "2281--2290", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3053061", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3053061", abstract = "Mild Cognitive Impairment (MCI) is a preclinical stage of Alzheimer&\#x0027;s Disease (AD) and is clinical heterogeneity. The classification of MCI is crucial for the early diagnosis and treatment of AD. In this study, we investigated the potential of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lyne:2021:CPA, author = "Anne-Marie Lyne and Le{\"\i}la Peri{\'e}", title = "Comparing Phylogenetic Approaches to Reconstructing Cell Lineage From Microsatellites With Missing Data", journal = j-TCBB, volume = "18", number = "6", pages = "2291--2301", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2992813", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2992813", abstract = "Due to the imperfect fidelity of DNA replication, somatic cells acquire DNA mutations at each division which record their lineage history. Microsatellites, tandem repeats of DNA nucleotide motifs, mutate more frequently than other genomic regions and by \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Blanco:2021:CAS, author = "Guillermo Blanco and Borja S{\'a}nchez and Lorena Ruiz and Florentino Fdez-Riverola and Abelardo Margolles and An{\'a}lia Louren{\c{c}}o", title = "Computational Approach to the Systematic Prediction of Glycolytic Abilities: Looking Into Human Microbiota", journal = j-TCBB, volume = "18", number = "6", pages = "2302--2313", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2978461", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2978461", abstract = "Glycoside hydrolases are responsible for the enzymatic deconstruction of complex carbohydrates. Most of the families are known to conserve the catalytic machinery and molecular mechanisms. This work introduces a new method to predict glycolytic abilities \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Willing:2021:CII, author = "Eyla Willing and Jens Stoye and Mar{\'\i}lia D. V. Braga", title = "Computing the Inversion-Indel Distance", journal = j-TCBB, volume = "18", number = "6", pages = "2314--2326", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2988950", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2988950", abstract = "The inversion distance, that is the distance between two unichromosomal genomes with the same content allowing only inversions of DNA segments, can be exactly computed thanks to a pioneering approach of Hannenhalli and Pevzner from 1995. In 2000, El-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ji:2021:CNN, author = "Junzhong Ji and Yao Yao", title = "Convolutional Neural Network With Graphical Lasso to Extract Sparse Topological Features for Brain Disease Classification", journal = j-TCBB, volume = "18", number = "6", pages = "2327--2338", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2989315", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2989315", abstract = "The functional connectivity provides new insights into the mechanisms of the human brain at network-level, which has been proved to be an effective biomarker for brain disease classification. Recently, machine learning methods have played an important \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Occhipinti:2021:DEM, author = "Annalisa Occhipinti and Youssef Hamadi and Hillel Kugler and Christoph M. Wintersteiger and Boyan Yordanov and Claudio Angione", title = "Discovering Essential Multiple Gene Effects Through Large Scale Optimization: an Application to Human Cancer Metabolism", journal = j-TCBB, volume = "18", number = "6", pages = "2339--2352", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2973386", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2973386", abstract = "Computational modelling of metabolic processes has proven to be a useful approach to formulate our knowledge and improve our understanding of core biochemical systems that are crucial to maintaining cellular functions. Towards understanding the broader \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zeng:2021:DDL, author = "Min Zeng and Chengqian Lu and Zhihui Fei and Fang-Xiang Wu and Yaohang Li and Jianxin Wang and Min Li", title = "{DMFLDA}: a Deep Learning Framework for Predicting {lncRNA}--Disease Associations", journal = j-TCBB, volume = "18", number = "6", pages = "2353--2363", month = nov # "\slash " # dec, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2983958", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Apr 20 07:14:58 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2983958", abstract = "A growing amount of evidence suggests that long non-coding RNAs (lncRNAs) play important roles in the regulation of biological processes in many human diseases. However, the number of experimentally verified lncRNA-disease associations is very limited. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sun:2022:E, author = "Sunny Sun and Yi-Ping Phoebe Chen", title = "Editorial", journal = j-TCBB, volume = "19", number = "1", pages = "1--2", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3089195", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3089195", abstract = "Presents the introductory editorial for this issue of the publication.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bi:2022:RSF, author = "Hui Bi and Shumei Cao and Hanying Yan and Zhongyi Jiang and Jun Zhang and Ling Zou", title = "Resting State Functional Connectivity Analysis During General Anesthesia: a High-Density {EEG} Study", journal = j-TCBB, volume = "19", number = "1", pages = "3--13", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3091000", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3091000", abstract = "The depth of anesthesia monitoring is helpful to guide administrations of general anesthetics during surgical procedures,however, the conventional 2-4 channels electroencephalogram (EEG) derived monitors have their limitations in monitoring conscious \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Craveur:2022:SB, author = "Pierrick Craveur and Tarun J. Narwani and Narayanaswamy Srinivasan and Jean-Christophe Gelly and Joseph Rebehmed and Alexandre G. de Brevern", title = "Shaking the $ \beta $-Bulges", journal = j-TCBB, volume = "19", number = "1", pages = "14--18", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3088444", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3088444", abstract = "&\#x03B2;-bulges are irregularities inside the &\#x03B2;-sheets. They represent more than 3 percent of the protein residues, i.e., they are as frequent as {3.$<$ sub$>$10$<$}/{sub$>$} helices. In terms of evolution, &\#x03B2;-bulges are not more conserved than any other \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Paul:2022:NFS, author = "Madhusudan Paul and Ashish Anand", title = "A New Family of Similarity Measures for Scoring Confidence of Protein Interactions Using Gene Ontology", journal = j-TCBB, volume = "19", number = "1", pages = "19--30", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3083150", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3083150", abstract = "The large-scale protein-protein interaction (PPI) data has the potential to play a significant role in the endeavor of understanding cellular processes. However, the presence of a considerable fraction of false positives is a bottleneck in realizing this \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2022:CDV, author = "Runhua Huang and Chengchuang Lin and Aihua Yin and Hanbiao Chen and Li Guo and Gansen Zhao and Xiaomao Fan and Shuangyin Li and Jinji Yang", title = "A Clinical Dataset and Various Baselines for Chromosome Instance Segmentation", journal = j-TCBB, volume = "19", number = "1", pages = "31--39", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3089507", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3089507", abstract = "{$<$ italic$>$Background}:{$<$}/{italic$>$} In medicine, chromosome karyotyping analysis plays a crucial role in prenatal diagnosis for diagnosing whether a fetus has severe defects or genetic diseases. However, chromosome instance segmentation is the most critical \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qian:2022:PSC, author = "Ying Qian and Xuelian Li and Qian Zhang and Jiongmin Zhang", title = "\pkg{SPP-CPI}: Predicting Compound--Protein Interactions Based On Neural Networks", journal = j-TCBB, volume = "19", number = "1", pages = "40--47", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3084397", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3084397", abstract = "Identifying interactions between compound and protein is a substantial part of the drug discovery process. Accurate prediction of interaction relationships can greatly reduce the time of drug development. The uniqueness of our method lies in three aspects:. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2022:PMA, author = "Jin Zhao and Haodi Feng and Daming Zhu and Yu Lin", title = "\pkg{MultiTrans}: an Algorithm for Path Extraction Through Mixed Integer Linear Programming for Transcriptome Assembly", journal = j-TCBB, volume = "19", number = "1", pages = "48--56", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3083277", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3083277", abstract = "Recent advances in RNA-seq technology have made identification of expressed genes affordable, and thus boosting repaid development of transcriptomic studies. Transcriptome assembly, reconstructing all expressed transcripts from RNA-seq reads, is an \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wickramarachchi:2022:PGR, author = "Anuradha Wickramarachchi and Yu Lin", title = "\pkg{GraphPlas}: Refined Classification of Plasmid Sequences Using Assembly Graphs", journal = j-TCBB, volume = "19", number = "1", pages = "57--67", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3082915", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3082915", abstract = "Plasmids are extra-chromosomal genetic materials with important markers that affect the function and behaviour of the microorganisms supporting their environmental adaptations. Hence the identification and recovery of such plasmid sequences from \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2022:LSS, author = "Shuai Lu and Yuguang Li and Fei Wang and Xiaofei Nan and Shoutao Zhang", title = "Leveraging Sequential and Spatial Neighbors Information by Using {CNNs} Linked With {GCNs} for Paratope Prediction", journal = j-TCBB, volume = "19", number = "1", pages = "68--74", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3083001", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3083001", abstract = "Antibodies consisting of variable and constant regions, are a special type of proteins playing a vital role in immune system of the vertebrate. They have the remarkable ability to bind a large range of diverse antigens with extraordinary affinity and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2022:AMI, author = "Tzu-Hsien Yang", title = "An Aggregation Method to Identify the {RNA} Meta-Stable Secondary Structure and its Functionally Interpretable Structure Ensemble", journal = j-TCBB, volume = "19", number = "1", pages = "75--86", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3082396", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3082396", abstract = "RNA can provide vital cellular functions through its secondary or tertiary structure. Due to the low-throughput nature of experimental approaches, studies on RNA structures mainly resort to computational methods. However, current existing tools fail to \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nguyen:2022:EED, author = "Trinh-Trung-Duong Nguyen and The-Anh Tran and Nguyen-Quoc-Khanh Le and Dinh-Minh Pham and Yu-Yen Ou", title = "An Extensive Examination of Discovering 5-Methylcytosine Sites in Genome-Wide {DNA} Promoters Using Machine Learning Based Approaches", journal = j-TCBB, volume = "19", number = "1", pages = "87--94", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3082184", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3082184", abstract = "It is well-known that the major reason for the rapid proliferation of cancer cells are the hypomethylation of the whole cancer genome and the hypermethylation of the promoter of particular tumor suppressor genes. Locating 5-methylcytosine (5mC) sites in \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{DiCamillo:2022:GED, author = "Barbara {Di Camillo} and Giuseppe Nicosia", title = "Guest Editorial: Deep Learning For Genomics", journal = j-TCBB, volume = "19", number = "1", pages = "95--96", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3080094", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3080094", abstract = "The six papers in this special section focus on deep learning for genomics. Thanks to the development of high-throughput technologies, a huge amount of omics data is being produced relative to DNA and RNA sequences and (and also) abundance at individual \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Clauwaert:2022:NTN, author = "Jim Clauwaert and Willem Waegeman", title = "Novel Transformer Networks for Improved Sequence Labeling in genomics", journal = j-TCBB, volume = "19", number = "1", pages = "97--106", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3035021", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3035021", abstract = "In genomics, a wide range of machine learning methodologies have been investigated to annotate biological sequences for positions of interest such as transcription start sites, translation initiation sites, methylation sites, splice sites and promoter \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2022:ECS, author = "Zhong Chen and Wensheng Zhang and Hongwen Deng and Kun Zhang", title = "Effective Cancer Subtype and Stage Prediction via Dropfeature-{DNNs}", journal = j-TCBB, volume = "19", number = "1", pages = "107--120", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3058941", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3058941", abstract = "Precise cancer subtype and/or stage prediction is instrumental for cancer diagnosis, treatment and management. However, most of the existing methods based on genomic profiles suffer from issues such as overfitting, high computational complexity and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cristovao:2022:IDL, author = "Francisco Cristovao and Silvia Cascianelli and Arif Canakoglu and Mark Carman and Luca Nanni and Pietro Pinoli and Marco Masseroli", title = "Investigating Deep Learning Based Breast Cancer Subtyping Using Pan-Cancer and Multi-Omic Data", journal = j-TCBB, volume = "19", number = "1", pages = "121--134", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3042309", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3042309", abstract = "Breast Cancer comprises multiple subtypes implicated in prognosis. Existing stratification methods rely on the expression quantification of small gene sets. Next Generation Sequencing promises large amounts of omic data in the next years. In this scenario,. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Viaud:2022:RLC, author = "Gautier Viaud and Prasanna Mayilvahanan and Paul-Henry Courn{\`e}de", title = "Representation Learning for the Clustering of Multi-Omics Data", journal = j-TCBB, volume = "19", number = "1", pages = "135--145", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3060340", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3060340", abstract = "The integration of several sources of data for the identification of subtypes of diseases has gained attention over the past few years. The heterogeneity and the high dimensions of the data sets calls for an adequate representation of the data. We \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nguyen:2022:GCN, author = "Tuan Nguyen and Giang T. T. Nguyen and Thin Nguyen and Duc-Hau Le", title = "Graph Convolutional Networks for Drug Response Prediction", journal = j-TCBB, volume = "19", number = "1", pages = "146--154", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3060430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3060430", abstract = "{$<$ italic$>$Background$<$}/{italic$>$}: Drug response prediction is an important problem in computational personalized medicine. Many machine-learning-based methods, especially deep learning-based ones, have been proposed for this task. However, these methods often \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mahapatra:2022:DNN, author = "Satyajit Mahapatra and Vivek Raj Gupta and Sitanshu Sekhar Sahu and Ganapati Panda", title = "Deep Neural Network and Extreme Gradient Boosting Based Hybrid Classifier for Improved Prediction of Protein-Protein Interaction", journal = j-TCBB, volume = "19", number = "1", pages = "155--165", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3061300", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3061300", abstract = "Understanding the behavioral process of life and disease-causing mechanism, knowledge regarding protein-protein interactions (PPI) is essential. In this paper, a novel hybrid approach combining deep neural network (DNN) and extreme gradient boosting \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2022:GEIa, author = "Zhipeng Cai and Min Li and Pavel Skums", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "19", number = "1", pages = "166--167", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3123699", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3123699", abstract = "The papers in this special section were presented at the 15th International Symposium on Bioinformatics Research and Applications (ISBRA 2019), which was held at Technical University of Catalonia, Barcelona, Spain on June 3-6, 2019.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2022:PDD, author = "Cheng Yan and Guihua Duan and Yayan Zhang and Fang-Xiang Wu and Yi Pan and Jianxin Wang", title = "Predicting Drug-Drug Interactions Based on Integrated Similarity and Semi-Supervised Learning", journal = j-TCBB, volume = "19", number = "1", pages = "168--179", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2988018", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2988018", abstract = "A drug-drug interaction (DDI) is defined as an association between two drugs where the pharmacological effects of a drug are influenced by another drug. Positive DDIs can usually improve the therapeutic effects of patients, but negative DDIs cause the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2022:CAD, author = "Fengpan Zhao and Pavel Skums and Alex Zelikovsky and Eric L. Sevigny and Monica Haavisto Swahn and Sheryl M. Strasser and Yan Huang and Yubao Wu", title = "Computational Approaches to Detect Illicit Drug Ads and Find Vendor Communities Within Social Media Platforms", journal = j-TCBB, volume = "19", number = "1", pages = "180--191", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2978476", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2978476", abstract = "The opioid abuse epidemic represents a major public health threat to global populations. The role social media may play in facilitating illicit drug trade is largely unknown due to limited research. However, it is known that social media use among adults \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Acharya:2022:RFP, author = "Sudipta Acharya and Laizhong Cui and Yi Pan", title = "A Refined 3-in-1 Fused Protein Similarity Measure: Application in Threshold-Free Hub Detection", journal = j-TCBB, volume = "19", number = "1", pages = "192--206", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2973563", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2973563", abstract = "An exhaustive literature survey shows that finding protein/gene similarity is an important step towards solving widespread bioinformatics problems, such as predicting protein-protein interactions, analyzing Protein-Protein Interaction Networks (PPINs), \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2022:GEIb, author = "Zhipeng Cai and Giri Narasimhan and Pavel Skums", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "19", number = "1", pages = "207--208", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3121736", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3121736", abstract = "The papers in this special section were presented at the 16th International Symposium on Bioinformatics Research and Applications (ISBRA 2020), which was held virtually, on December 1-4, 2020. The ISBRA symposium provides a forum for the exchange of ideas \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gao:2022:MSC, author = "Shan Gao and Renmin Han and Xiangrui Zeng and Zhiyong Liu and Min Xu and Fa Zhang", title = "Macromolecules Structural Classification With a {$3$D} Dilated Dense Network in Cryo-Electron Tomography", journal = j-TCBB, volume = "19", number = "1", pages = "209--219", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3065986", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3065986", abstract = "Cryo-electron tomography, combined with subtomogram averaging (STA), can reveal three-dimensional (3D) macromolecule structures in the near-native state from cells and other biological samples. In STA, to get a high-resolution 3D view of macromolecule \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Storato:2022:PKD, author = "Davide Storato and Matteo Comin", title = "\pkg{K2Mem}: Discovering Discriminative {$K$}-mers From Sequencing Data for Metagenomic Reads Classification", journal = j-TCBB, volume = "19", number = "1", pages = "220--229", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3117406", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3117406", abstract = "The major problem when analyzing a metagenomic sample is to taxonomically annotate its reads to identify the species they contain. Most of the methods currently available focus on the classification of reads using a set of reference genomes and their k-. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dhar:2022:PTT, author = "Saurav Dhar and Chengchen Zhang and Ion I. Mandoiu and Mukul S. Bansal", title = "\pkg{TNet}: Transmission Network Inference Using Within-Host Strain Diversity and its Application to Geographical Tracking of {COVID-19} Spread", journal = j-TCBB, volume = "19", number = "1", pages = "230--242", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3096455", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3096455", abstract = "The inference of disease transmission networks is an important problem in epidemiology. One popular approach for building transmission networks is to reconstruct a phylogenetic tree using sequences from disease strains sampled from infected hosts and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:PED, author = "Jun Wang and Huiling Zhang and Wei Ren and Maozu Guo and Guoxian Yu", title = "\pkg{EpiMC}: Detecting Epistatic Interactions Using Multiple Clusterings", journal = j-TCBB, volume = "19", number = "1", pages = "243--254", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3080462", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3080462", abstract = "Detecting single nucleotide polymorphisms (SNPs) interactions is crucial to identify susceptibility genes associated with complex human diseases in genome-wide association studies. Clustering-based approaches are widely used in reducing search space and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{V:2022:HIN, author = "Sunil Kumar P. V. and Adheeba Thahsin and Manju M. and Gopakumar G.", title = "A Heterogeneous Information Network Model for Long Non-Coding {RNA} Function Prediction", journal = j-TCBB, volume = "19", number = "1", pages = "255--266", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3000518", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3000518", abstract = "Exciting information on the functional roles played by long non-coding RNA (lncRNA) has drawn substantial research attention these days. With the advent of techniques such as RNA-Seq, thousands of lncRNAs are identified in very short time spans. However, \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:NED, author = "Hansheng Li and JianPing Li and Yuxin Kang and Chunbao Wang and Feihong Liu and Wenli Hui and Qirong Bo and Lei Cui and Jun Feng and Lin Yang", title = "A Novel Encoding and Decoding Calibration Guiding Pathway for Pathological Image Analysis", journal = j-TCBB, volume = "19", number = "1", pages = "267--274", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3023467", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3023467", abstract = "Diagnostic pathology is the foundation and gold standard for identifying carcinomas, and the accurate quantification of pathological images can provide objective clues for pathologists to make more convincing diagnosis. Recently, the encoder-decoder \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gull:2022:PAS, author = "Sadaf Gull and Fayyaz Minhas", title = "\pkg{AMP$_0$}: Species-Specific Prediction of Anti-microbial Peptides Using Zero and Few Shot Learning", journal = j-TCBB, volume = "19", number = "1", pages = "275--283", month = jan, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.2999399", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.2999399", abstract = "Evolution of drug-resistant microbial species is one of the major challenges to global health. Development of new antimicrobial treatments such as antimicrobial peptides needs to be accelerated to combat this threat. However, the discovery of novel \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guzzi:2022:EDL, author = "Pietro Hiram Guzzi and Marinka Zitnik", title = "Editorial Deep Learning and Graph Embeddings for Network Biology", journal = j-TCBB, volume = "19", number = "2", pages = "653--654", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3110279", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3110279", abstract = "This special issue contains a multitude of high-quality manuscripts that cover a broad range of applications supporting the need to discuss and foster these advances in a systematic way, provide practical tools for practitioners, and describe new \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:IIM, author = "Jin Li and Jingru Wang and Hao Lv and Zhuoxuan Zhang and Zaixia Wang", title = "{IMCHGAN}: Inductive Matrix Completion With Heterogeneous Graph Attention Networks for Drug-Target Interactions Prediction", journal = j-TCBB, volume = "19", number = "2", pages = "655--665", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3088614", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3088614", abstract = "Identification of targets among known drugs plays an important role in drug repurposing and discovery. Computational approaches for prediction of drug&\#x2013;target interactions (DTIs)are highly desired in comparison to traditional biological experiments \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pan:2022:IPS, author = "Xiaoyong Pan and Lei Chen and Min Liu and Zhibin Niu and Tao Huang and Yu-Dong Cai", title = "Identifying Protein Subcellular Locations With Embeddings-Based {\tt node2loc}", journal = j-TCBB, volume = "19", number = "2", pages = "666--675", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3080386", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3080386", abstract = "Identifying protein subcellular locations is an important topic in protein function prediction. Interacting proteins may share similar locations. Thus, it is imperative to infer protein subcellular locations by taking protein-protein interactions (PPIs). \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{KC:2022:PBI, author = "Kishan KC and Rui Li and Feng Cui and Anne R. Haake", title = "Predicting Biomedical Interactions With Higher-Order Graph Convolutional Networks", journal = j-TCBB, volume = "19", number = "2", pages = "676--687", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3059415", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3059415", abstract = "Biomedical interaction networks have incredible potential to be useful in the prediction of biologically meaningful interactions, identification of network biomarkers of disease, and the discovery of putative drug targets. Recently, graph neural networks \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lei:2022:IMD, author = "Xiujuan Lei and Jiaojiao Tie and Yi Pan", title = "Inferring Metabolite-Disease Association Using Graph Convolutional Networks", journal = j-TCBB, volume = "19", number = "2", pages = "688--698", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3065562", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3065562", abstract = "As is well known, biological experiments are time-consuming and laborious, so there is absolutely no doubt that developing an effective computational model will help solve these problems. Most of computational models rely on the biological similarity and \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gao:2022:PSC, author = "Jianliang Gao and Tengfei Lyu and Fan Xiong and Jianxin Wang and Weimao Ke and Zhao Li", title = "Predicting the Survival of Cancer Patients With Multimodal Graph Neural Network", journal = j-TCBB, volume = "19", number = "2", pages = "699--709", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3083566", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3083566", abstract = "In recent years, cancer patients survival prediction holds important significance for worldwide health problems, and has gained many researchers attention in medical information communities. Cancer patients survival prediction can be seen the \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nguyen:2022:IMG, author = "Giang T. T. Nguyen and Hoa D. Vu and Duc-Hau Le", title = "Integrating Molecular Graph Data of Drugs and Multiple -Omic Data of Cell Lines for Drug Response Prediction", journal = j-TCBB, volume = "19", number = "2", pages = "710--717", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3096960", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3096960", abstract = "Previous studies have either learned drug&\#x0027;s features from their string or numeric representations, which are not natural forms of drugs, or only used genomic data of cell lines for the drug response prediction problem. Here, we proposed a deep \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nguyen:2022:GEF, author = "Tri Minh Nguyen and Thin Nguyen and Thao Minh Le and Truyen Tran", title = "{GEFA}: Early Fusion Approach in Drug-Target Affinity Prediction", journal = j-TCBB, volume = "19", number = "2", pages = "718--728", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3094217", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3094217", abstract = "Predicting the interaction between a compound and a target is crucial for rapid drug repurposing. Deep learning has been successfully applied in drug-target affinity (DTA)problem. However, previous deep learning-based methods ignore modeling the direct \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Manipur:2022:NGE, author = "Ichcha Manipur and Mario Manzo and Ilaria Granata and Maurizio Giordano and Lucia Maddalena and Mario R. Guarracino", title = "{Netpro2vec}: a Graph Embedding Framework for Biomedical Applications", journal = j-TCBB, volume = "19", number = "2", pages = "729--740", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3078089", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3078089", abstract = "The ever-increasing importance of structured data in different applications, especially in the biomedical field, has driven the need for reducing its complexity through projections into a more manageable space. The latest methods for learning features on \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ren:2022:PDM, author = "Yuanfang Ren and Aisharjya Sarkar and Pierangelo Veltri and Ahmet Ay and Alin Dobra and Tamer Kahveci", title = "Pattern Discovery in Multilayer Networks", journal = j-TCBB, volume = "19", number = "2", pages = "741--752", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3105001", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3105001", abstract = "{$<$ italic$>$Motivation}:{$<$}/{italic$>$} In bioinformatics, complex cellular modeling and behavior simulation to identify significant molecular interactions is considered a relevant problem. Traditional methods model such complex systems using single and binary \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shen:2022:DLM, author = "Zhen Shen and Qinhu Zhang and Kyungsook Han and De-Shuang Huang", title = "A Deep Learning Model for {RNA}-Protein Binding Preference Prediction Based on Hierarchical {LSTM} and Attention Network", journal = j-TCBB, volume = "19", number = "2", pages = "753--762", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3007544", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3007544", abstract = "Attention mechanism has the ability to find important information in the sequence. The regions of the RNA sequence that can bind to proteins are more important than those that cannot bind to proteins. Neither conventional methods nor deep learning-based \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gao:2022:NMB, author = "Zhen Gao and Yu-Tian Wang and Qing-Wen Wu and Lei Li and Jian-Cheng Ni and Chun-Hou Zheng", title = "A New Method Based on Matrix Completion and Non-Negative Matrix Factorization for Predicting Disease-Associated {miRNAs}", journal = j-TCBB, volume = "19", number = "2", pages = "763--772", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3027444", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3027444", abstract = "Numerous studies have shown that microRNAs are associated with the occurrence and development of human diseases. Thus, studying disease-associated miRNAs is significantly valuable to the prevention, diagnosis and treatment of diseases. In this paper, we \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mallick:2022:NGT, author = "Koushik Mallick and Saurav Mallik and Sanghamitra Bandyopadhyay and Sikim Chakraborty", title = "A Novel Graph Topology-Based {GO}-Similarity Measure for Signature Detection From Multi-Omics Data and its Application to Other Problems", journal = j-TCBB, volume = "19", number = "2", pages = "773--785", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3020537", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3020537", abstract = "Large scale multi-omics data analysis and signature prediction have been a topic of interest in the last two decades. While various traditional clustering/correlation-based methods have been proposed, but the overall prediction is not always satisfactory. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2022:NMC, author = "Xin Huang and Zhenqian Liao and Bing Liu and Fengmei Tao and Benzhe Su and Xiaohui Lin", title = "A Novel Method for Constructing Classification Models by Combining Different Biomarker Patterns", journal = j-TCBB, volume = "19", number = "2", pages = "786--794", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3022076", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3022076", abstract = "Different biomarker patterns, such as those of molecular biomarkers and ratio biomarkers, have their own merits in clinical applications. In this study, a novel machine learning method used in biomedical data analysis for constructing classification \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Feng:2022:III, author = "Shi-Hao Feng and Chun-Qiu Xia and Pei-Dong Zhang and Hong-Bin Shen", title = "{{\em Ab-Initio}} Membrane Protein Amphipathic Helix Structure Prediction Using Deep Neural Networks", journal = j-TCBB, volume = "19", number = "2", pages = "795--805", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3029274", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3029274", abstract = "Amphipathic helix (AH)features the segregation of polar and nonpolar residues and plays important roles in many membrane-associated biological processes through interacting with both the lipid and the soluble phases. Although the AH structure has been \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Giang:2022:FBA, author = "Trinh Van Giang and Tatsuya Akutsu and Kunihiko Hiraishi", title = "An {FVS}-Based Approach to Attractor Detection in Asynchronous Random {Boolean} Networks", journal = j-TCBB, volume = "19", number = "2", pages = "806--818", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3028862", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3028862", abstract = "Boolean networks (BNs)play a crucial role in modeling and analyzing biological systems. One of the central issues in the analysis of BNs is attractor detection, i.e., identification of all possible attractors. This problem becomes more challenging for \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2022:CSD, author = "Yiding Zhang and Lyujie Chen and Shao Li", title = "{CIPHER-SC}: Disease-Gene Association Inference Using Graph Convolution on a Context-Aware Network With Single-Cell Data", journal = j-TCBB, volume = "19", number = "2", pages = "819--829", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3017547", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3017547", abstract = "Inference of disease-gene associations helps unravel the pathogenesis of diseases and contributes to the treatment. Although many machine learning-based methods have been developed to predict causative genes, accurate association inference remains \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sarkar:2022:DPR, author = "Aisharjya Sarkar and Prabhat Mishra and Tamer Kahveci", title = "Data Perturbation and Recovery of Time Series Gene Expression Data", journal = j-TCBB, volume = "19", number = "2", pages = "830--842", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3058342", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3058342", abstract = "Cells, in order to regulate their activities, process transcripts by controlling which genes to transcribe and by what amount. The transcription level of genes often change over time. Rate of change of gene transcription varies between genes. It can even \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bhatt:2022:DKG, author = "Sachin Bhatt and Prithvi Singh and Archana Sharma and Arpita Rai and Ravins Dohare and Shweta Sankhwar and Akash Sharma and Mansoor Ali Syed", title = "Deciphering Key Genes and {miRNAs} Associated With Hepatocellular Carcinoma via Network-Based Approach", journal = j-TCBB, volume = "19", number = "2", pages = "843--853", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3016781", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3016781", abstract = "Hepatocellular carcinoma (HCC)is a common type of liver cancer and has a high mortality world-widely. The diagnosis, prognoses, and therapeutics are very poor due to the unclear molecular mechanism of progression of the disease. To unveil the molecular \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Su:2022:DRL, author = "Yu-Ting Su and Yao Lu and Mei Chen and An-An Liu", title = "Deep Reinforcement Learning-Based Progressive Sequence Saliency Discovery Network for Mitosis Detection In Time-Lapse Phase-Contrast Microscopy Images", journal = j-TCBB, volume = "19", number = "2", pages = "854--865", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3019042", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3019042", abstract = "Mitosis detection plays an important role in the analysis of cell status and behavior and is therefore widely utilized in many biological research and medical applications. In this article, we propose a deep reinforcement learning-based progressive \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cao:2022:DUA, author = "Ben Cao and Xue Ii and Xiaokang Zhang and Bin Wang and Qiang Zhang and Xiaopeng Wei", title = "Designing Uncorrelated Address Constrain for {DNA} Storage by {DMVO} Algorithm", journal = j-TCBB, volume = "19", number = "2", pages = "866--877", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3011582", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3011582", abstract = "At present, huge amounts of data are being produced every second, a situation that will gradually overwhelm current storage technology. DNA is a storage medium that features high storage density and long-term stability and is now considered to be a \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guan:2022:DDA, author = "Boxin Guan and Yuhai Zhao and Ying Yin and Yuan Li", title = "Detecting Disease-Associated {SNP--SNP} Interactions Using Progressive Screening Memetic Algorithm", journal = j-TCBB, volume = "19", number = "2", pages = "878--887", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3019256", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3019256", abstract = "Hundreds of thousands of single nucleotide polymorphisms (SNPs)are currently available for genome-wide association study (GWAS). Detecting disease-associated SNP-SNP interactions is considered an important way to capture the underlying genetic causes of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bae:2022:DPA, author = "Ho Bae and Seonwoo Min and Hyun-Soo Choi and Sungroh Yoon", title = "{DNA} Privacy: Analyzing Malicious {DNA} Sequences Using Deep Neural Networks", journal = j-TCBB, volume = "19", number = "2", pages = "888--898", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3017191", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3017191", abstract = "Recent advances in next-generation sequencing technologies have led to the successful insertion of video information into DNA using synthesized oligonucleotides. Several attempts have been made to embed larger data into living organisms. This process of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kim:2022:DTC, author = "Seonho Kim and Juntae Yoon", title = "Dual Triggered Correspondence Topic ({DTCT}) model for {MeSH} annotation", journal = j-TCBB, volume = "19", number = "2", pages = "899--911", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3016355", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3016355", abstract = "Accurate Medical Subject Headings (MeSH) annotation is an important issue for researchers in terms of effective information retrieval and knowledge discovery in the biomedical literature. We have developed a powerful dual triggered correspondence topic \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ponte-Fernandez:2022:EEM, author = "Christian Ponte-Fern{\'a}ndez and Jorge Gonz{\'a}lez-Dom{\'\i}nguez and Antonio Carvajal-Rodr{\'\i}guez and Mar{\'\i}a J. Mart{\'\i}n", title = "Evaluation of Existing Methods for High-Order Epistasis Detection", journal = j-TCBB, volume = "19", number = "2", pages = "912--926", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3030312", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3030312", abstract = "Finding epistatic interactions among loci when expressing a phenotype is a widely employed strategy to understand the genetic architecture of complex traits in GWAS. The abundance of methods dedicated to the same purpose, however, makes it increasingly \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chowdhury:2022:GAD, author = "Tapan Chowdhury and Susanta Chakraborty and Argha Nandan", title = "{GPU} Accelerated Drug Application on Signaling Pathways Containing Multiple Faults Using {Boolean} Networks", journal = j-TCBB, volume = "19", number = "2", pages = "927--939", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3014172", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3014172", abstract = "Cell growth is governed by the flow of information from growth factors to transcription factors. This flow involves protein-protein interactions known as a signaling pathway, which triggers the cell division. The biological network in the presence of \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Seabolt:2022:FGP, author = "Edward E. Seabolt and Gowri Nayar and Harsha Krishnareddy and Akshay Agarwal and Kristen L. Beck and Ignacio Terrizzano and Eser Kandogan and Mark Kunitomi and Mary Roth and Vandana Mukherjee and James H. Kaufman", title = "Functional Genomics Platform, a Cloud-Based Platform for Studying Microbial Life at Scale", journal = j-TCBB, volume = "19", number = "2", pages = "940--952", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3021231", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3021231", abstract = "The rapid growth in biological sequence data is revolutionizing our understanding of genotypic diversity and challenging conventional approaches to informatics. With the increasing availability of genomic data, traditional bioinformatic tools require \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:IGS, author = "Fei Wang and Yulian Ding and Xiujuan Lei and Bo Liao and Fang-Xiang Wu", title = "Identifying Gene Signatures for Cancer Drug Repositioning Based on Sample Clustering", journal = j-TCBB, volume = "19", number = "2", pages = "953--965", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3019781", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3019781", abstract = "Drug repositioning is an important approach for drug discovery. Computational drug repositioning approaches typically use a gene signature to represent a particular disease and connect the gene signature with drug perturbation profiles. Although disease \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Alazmi:2022:IIB, author = "Meshari Alazmi and Olaa Motwalli", title = "Immuno-Informatics Based Peptides: an Approach for Vaccine Development Against Outer Membrane Proteins of \bioname{Pseudomonas} Genus", journal = j-TCBB, volume = "19", number = "2", pages = "966--973", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3032651", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Apr 15 06:41:04 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3032651", abstract = "Pseudomonas genus is among the top nosocomial pathogens known to date. Being highly opportunistic, members of pseudomonas genus are most commonly connected with nosocomial infections of urinary tract and ventilator-associated pneumonia. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Newaz:2022:IDA, author = "Khalique Newaz and Tijana Milenkovi{\'c}", title = "Inference of a Dynamic Aging-related Biological Subnetwork via Network Propagation", journal = j-TCBB, volume = "19", number = "2", pages = "974--988", month = mar, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3022767", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3022767", abstract = "Gene expression (GE)data capture valuable condition-specific information (`condition'; can mean a biological process, disease stage, age, patient, etc.)However, GE analyses ignore physical interactions between gene products, i.e., proteins. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", bibdate = "Fri Apr 15 06:41:04 MDT 2022", } @Article{Tsui:2022:E, author = "Stephen Kwok-Wing Tsui", title = "Editorial", journal = j-TCBB, volume = "19", number = "3", pages = "1255--1256", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3155845", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3155845", abstract = "This special section of IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB) is a collection of papers presented at the 18th Asia Pacific Bioinformatics Conference (APBC2020), which was a virtual conference held in Seoul, Korea, from \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2022:PPP, author = "Jiongmin Zhang and Man Zhu and Ying Qian", title = "{protein2vec}: Predicting Protein--Protein Interactions Based on {LSTM}", journal = j-TCBB, volume = "19", number = "3", pages = "1257--1266", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3003941", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3003941", abstract = "The semantic similarity of gene ontology (GO) terms is widely used to predict protein-protein interactions (PPIs). The traditional semantic similarity measures are based mainly on manually crafted features, which may ignore some important hidden \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Park:2022:NAD, author = "Byungkyu Park and Wook Lee and Kyungsook Han", title = "A New Approach to Deriving Prognostic Gene Pairs From Cancer Patient-Specific Gene Correlation Networks", journal = j-TCBB, volume = "19", number = "3", pages = "1267--1276", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3017209", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3017209", abstract = "Many of the known prognostic gene signatures for cancer are individual genes or combination of genes, found by the analysis of microarray data. However, many of the known cancer signatures are less predictive than random gene expression signatures, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:DDC, author = "Yanbo Li and Yu Lin", title = "{DCHap}: a Divide-and-Conquer Haplotype Phasing Algorithm for Third-Generation Sequences", journal = j-TCBB, volume = "19", number = "3", pages = "1277--1284", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3005673", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3005673", abstract = "The development of DNA sequencing technologies makes it possible to obtain reads originated from both copies of a chromosome (two parental chromosomes, or haplotypes) of a single individual. Reconstruction of both haplotypes (i.e., haplotype phasing) \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2022:CNA, author = "Chengchuang Lin and Gansen Zhao and Zhirong Yang and Aihua Yin and Xinming Wang and Li Guo and Hanbiao Chen and Zhaohui Ma and Lei Zhao and Haoyu Luo and Tianxing Wang and Bichao Ding and Xiongwen Pang and Qiren Chen", title = "{CIR-Net}: Automatic Classification of Human Chromosome Based on {Inception-ResNet} Architecture", journal = j-TCBB, volume = "19", number = "3", pages = "1285--1293", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3003445", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3003445", abstract = "Background: In medicine, karyotyping chromosomes is important for medical diagnostics, drug development, and biomedical research. Unfortunately, chromosome karyotyping is usually done by skilled cytologists manually, which requires \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sang:2022:SEB, author = "Shengtian Sang and Xiaoxia Liu and Xiaoyu Chen and Di Zhao", title = "A Scalable Embedding Based Neural Network Method for Discovering Knowledge From Biomedical Literature", journal = j-TCBB, volume = "19", number = "3", pages = "1294--1301", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3003947", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3003947", abstract = "Nowadays, the amount of biomedical literatures is growing at an explosive speed, and much useful knowledge is yet undiscovered in the literature. Classical information retrieval techniques allow to access explicit information from a given collection of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kim:2022:HSC, author = "Sun Ah Kim and Nayeon Kang and Taesung Park", title = "Hierarchical Structured Component Analysis for Microbiome Data Using Taxonomy Assignments", journal = j-TCBB, volume = "19", number = "3", pages = "1302--1312", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3039326", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3039326", abstract = "The recent advent of high-throughput sequencing technology has enabled us to study the associations between human microbiome and diseases. The DNA sequences of microbiome samples are clustered as operational taxonomic units (OTUs) according to their \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2022:EEP, author = "Ken Hung-On Yu and Xiunan Fang and Haobin Yao and Bond Ng and Tak Kwan Leung and Ling-Ling Wang and Chi Ho Lin and Agnes Sze Wah Chan and Wai Keung Leung and Suet Yi Leung and Joshua Wing Kei Ho", title = "Evaluation of Experimental Protocols for Shotgun Whole-Genome Metagenomic Discovery of Antibiotic Resistance Genes", journal = j-TCBB, volume = "19", number = "3", pages = "1313--1321", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3004063", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3004063", abstract = "Shotgun metagenomics has enabled the discovery of antibiotic resistance genes (ARGs). Although there have been numerous studies benchmarking the bioinformatics methods for shotgun metagenomic data analysis, there has not yet been a study that \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2022:SBS, author = "Yingjun Ma and Tingting He and Yuting Tan and Xingpeng Jiang", title = "{Seq-BEL}: Sequence-Based Ensemble Learning for Predicting Virus-Human Protein--Protein Interaction", journal = j-TCBB, volume = "19", number = "3", pages = "1322--1333", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3008157", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3008157", abstract = "Infectious diseases are currently the most important and widespread health problem, and identifying viral infection mechanisms is critical for controlling diseases caused by highly infectious viruses. Because of the lack of non-interactive protein pairs \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2022:E, author = "Xiuzhen Huang and Yu Zhang and Xuan Guo", title = "Editorial", journal = j-TCBB, volume = "19", number = "3", pages = "1334--1335", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3131688", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3131688", abstract = "This special section gives the opportunity to know recent advances in the application of intelligent optimization algorithms in genomics and precision medicine. Precision medicine is designed to optimize the pathway for diagnosis, therapeutic intervention,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Abdollahi:2022:DAD, author = "Sina Abdollahi and Peng-Chan Lin and Jung-Hsien Chiang", title = "{DiaDeL}: an Accurate Deep Learning-Based Model With Mutational Signatures for Predicting Metastasis Stage and Cancer Types", journal = j-TCBB, volume = "19", number = "3", pages = "1336--1343", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3115504", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3115504", abstract = "Mutational signatures help identify cancer-associated genes that are being involved in tumorigenesis pathways. Hence, these pathways guide precision medicine approaches to find appropriate drugs and treatments. The pattern of mutations varies in different \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Adnan:2022:CER, author = "Nahim Adnan and Maryam Zand and Tim H. M. Huang and Jianhua Ruan", title = "Construction and Evaluation of Robust Interpretation Models for Breast Cancer Metastasis Prediction", journal = j-TCBB, volume = "19", number = "3", pages = "1344--1353", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3120673", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3120673", abstract = "Interpretability of machine learning (ML) models represents the extent to which a model&\#x2019;s decision-making process can be understood by model developers and/or end users. Transcriptomics-based cancer prognosis models, for example, while achieving \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bhadra:2022:UFS, author = "Tapas Bhadra and Saurav Mallik and Amir Sohel and Zhongming Zhao", title = "Unsupervised Feature Selection Using an Integrated Strategy of Hierarchical Clustering With Singular Value Decomposition: an Integrative Biomarker Discovery Method With Application to Acute Myeloid Leukemia", journal = j-TCBB, volume = "19", number = "3", pages = "1354--1364", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3110989", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3110989", abstract = "In this article, we propose a novel unsupervised feature selection method by combining hierarchical feature clustering with singular value decomposition (SVD). The proposed algorithm first generates several feature clusters by adopting the hierarchical \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bui:2022:HBB, author = "Lien A. Bui and Dacosta Yeboah and Louis Steinmeister and Sima Azizi and Daniel B. Hier and Donald C. Wunsch and Gayla R. Olbricht and Tayo Obafemi-Ajayi", title = "Heterogeneity in Blood Biomarker Trajectories After Mild {TBI} Revealed by Unsupervised Learning", journal = j-TCBB, volume = "19", number = "3", pages = "1365--1378", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3091972", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3091972", abstract = "Concussions, also known as mild traumatic brain injury (mTBI), are a growing health challenge. Approximately four million concussions are diagnosed annually in the United States. Concussion is a heterogeneous disorder in causation, symptoms, and outcome \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Manduchi:2022:GAC, author = "Elisabetta Manduchi and Trang T. Le and Weixuan Fu and Jason H. Moore", title = "Genetic Analysis of Coronary Artery Disease Using Tree-Based Automated Machine Learning Informed By Biology-Based Feature Selection", journal = j-TCBB, volume = "19", number = "3", pages = "1379--1386", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3099068", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3099068", abstract = "Machine Learning (ML) approaches are increasingly being used in biomedical applications. Important challenges of ML include choosing the right algorithm and tuning the parameters for optimal performance. Automated ML (AutoML) methods, such as Tree-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Causey:2022:EUN, author = "Jason Causey and Jonathan Stubblefield and Jake Qualls and Jennifer Fowler and Lingrui Cai and Karl Walker and Yuanfang Guan and Xiuzhen Huang", title = "An Ensemble of {U-Net} Models for Kidney Tumor Segmentation With {CT} Images", journal = j-TCBB, volume = "19", number = "3", pages = "1387--1392", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3085608", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3085608", abstract = "We present here the Arkansas AI-Campus solution method for the 2019 Kidney Tumor Segmentation Challenge (KiTS19). Our Arkansas AI-Campus team participated the KiTS19 Challenge for four months, from March to July of 2019. This paper provides a summary of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:APD, author = "Lechuan Li and Chonghao Zhang and Shiyu Liu and Hannah Guan and Yu Zhang", title = "Age Prediction by {DNA} Methylation in Neural Networks", journal = j-TCBB, volume = "19", number = "3", pages = "1393--1402", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3084596", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3084596", abstract = "Aging is traditionally thought to be caused by complex and interacting factors such as DNA methylation. The traditional formula of DNA methylation aging is based on linear models and little work has explored the effectiveness of neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Madhumita:2022:FWA, author = "Madhumita and Sushmita Paul", title = "A Feature Weighting-Assisted Approach for Cancer Subtypes Identification From Paired Expression Profiles", journal = j-TCBB, volume = "19", number = "3", pages = "1403--1414", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3041723", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3041723", abstract = "Identification of cancer subtypes is critically important for understanding the heterogeneity present in tumors. Projects like The Cancer Genome Atlas (TCGA), have made available the data-sets containing expression profiles of multiple types of biomarkers \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2022:NEM, author = "Lewei Zhou and Yucong Tang and Guiying Yan", title = "A New Estimation Method for the Biological Interaction Predicting Problems", journal = j-TCBB, volume = "19", number = "3", pages = "1415--1423", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3049642", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3049642", abstract = "For the past decades, computational methods have been developed to predict various interactions in biological problems. Usually these methods treated the predicting problems as semi-supervised problem or positive-unlabeled(PU) learning problem. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zou:2022:NAL, author = "Chengye Zou and Xiaopeng Wei and Qiang Zhang and Changjun Zhou", title = "A Novel Adaptive Linear Neuron Based on {DNA} Strand Displacement Reaction Network", journal = j-TCBB, volume = "19", number = "3", pages = "1424--1434", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3045567", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3045567", abstract = "Analog DNA strand displacement circuits can be used to build artificial neural network due to the continuity of dynamic behavior. In this study, DNA implementations of novel catalysis, novel degradation and adjustment reaction modules are designed and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2022:RAB, author = "Hao Jiang and Fei Zhan and Congtao Wang and Jianfeng Qiu and Yansen Su and Chunhou Zheng and Xingyi Zhang and Xiangxiang Zeng", title = "A Robust Algorithm Based on Link Label Propagation for Identifying Functional Modules From Protein--Protein Interaction Networks", journal = j-TCBB, volume = "19", number = "3", pages = "1435--1448", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3038815", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3038815", abstract = "Identifying functional modules in protein-protein interaction (PPI) networks elucidates cellular organization and mechanism. Various methods have been proposed to identify the functional modules in PPI networks, but most of these methods do not consider \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bankapur:2022:EML, author = "Sanjay Bankapur and Nagamma Patil", title = "An Effective Multi-Label Protein Sub-Chloroplast Localization Prediction by Skipped-Grams of Evolutionary Profiles Using Deep Neural Network", journal = j-TCBB, volume = "19", number = "3", pages = "1449--1458", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3037465", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3037465", abstract = "Chloroplast is one of the most classic organelles in algae and plant cells. Identifying the locations of chloroplast proteins in the chloroplast organelle is an important as well as a challenging task in deciphering their functions. Biological-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2022:EHF, author = "Liangliang Liu and Shaojie Tang and Fang-Xiang Wu and Yu-Ping Wang and Jianxin Wang", title = "An Ensemble Hybrid Feature Selection Method for Neuropsychiatric Disorder Classification", journal = j-TCBB, volume = "19", number = "3", pages = "1459--1471", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3053181", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3053181", abstract = "Magnetic resonance imagings (MRIs) are providing increased access to neuropsychiatric disorders that can be made available for advanced data analysis. However, the single type of data limits the ability of psychiatrists to distinguish the subclasses of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pouryahya:2022:ANI, author = "Maryam Pouryahya and Jung Hun Oh and Pedram Javanmard and James C. Mathews and Zehor Belkhatir and Joseph O. Deasy and Allen R. Tannenbaum", title = "{aWCluster}: a Novel Integrative Network-Based Clustering of Multiomics for Subtype Analysis of Cancer Data", journal = j-TCBB, volume = "19", number = "3", pages = "1472--1483", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3039511", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3039511", abstract = "The remarkable growth of multi-platform genomic profiles has led to the challenge of multiomics data integration. In this study, we present a novel network-based multiomics clustering founded on the Wasserstein distance from optimal mass transport. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2022:BIS, author = "Hu Zhang and Jingsong Chen and Tianhai Tian", title = "{Bayesian} Inference of Stochastic Dynamic Models Using Early-Rejection Methods Based on Sequential Stochastic Simulations", journal = j-TCBB, volume = "19", number = "3", pages = "1484--1494", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3039490", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3039490", abstract = "Stochastic modelling is an important method to investigate the functions of noise in a wide range of biological systems. However, the parameter inference for stochastic models is still a challenging problem partially due to the large computing time \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zuanetti:2022:BME, author = "Daiane Aparecida Zuanetti and Luis Aparecido Milan", title = "{Bayesian} Modeling for Epistasis Analysis Using Data-Driven Reversible Jump", journal = j-TCBB, volume = "19", number = "3", pages = "1495--1506", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3043857", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3043857", abstract = "We propose a procedure for modeling a phenotype using QTLs which estimate the additive and dominance effects of genotypes and epistasis. The estimation of the model is implemented through a Bayesian approach which uses the data-driven reversible jump \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ascension:2022:BPM, author = "Alex M. Ascensi{\'o}n and Marcos J. Ara{\'u}zo-Bravo", title = "{BigMPI4py}: {Python} Module for Parallelization of Big Data Objects Discloses Germ Layer Specific {DNA} Demethylation Motifs", journal = j-TCBB, volume = "19", number = "3", pages = "1507--1522", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3043979", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3043979", abstract = "Parallelization in Python integrates Message Passing Interface via the mpi4py module. Since mpi4py does not support parallelization of objects greater than $ 2^{31} $ ytes, we developed BigMPI4py, a Python module that wraps mpi4py, supporting object sizes beyond this boundary. BigMPI4py automatically determines the optimal object distribution strategy, and uses vectorized methods, achieving higher parallelization efficiency. BigMPI4py facilitates the implementation of Python for Big Data applications in multicore workstations and High Performance Computer systems. We use BigMPI4py to speed-up the search for germ line specific de novo DNA methylated/unmethylated motifs from the 59 whole genome bisulfite sequencing DNA methylation samples from 27 human tissues of the ENCODE project. We developed a parallel implementation of the Kruskall-Wallis test to find CpGs with differential methylation across germ layers. The parallel evaluation of the significance of 55 million CpG achieved a 22x speedup with 25 cores allowing us an efficient identification of a set of hypermethylated genes in ectoderm and mesoderm-related tissues, and another set in endoderm-related tissues and finally, the discovery of germ layer specific DNA demethylation motifs. Our results point out that DNA methylation signal provide a higher degree of information for the demethylated state than for the methylated state. BigMPI4py is available at https://https://www.arauzolab.org/tools/bigmpi4py and https://gitlab.com/alexmascension/bigmpi4py and the Jupyter Notebook with WGBS analysis at https://gitlab.com/alexmascension/wgbs-analysis", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:CRR, author = "Conghua Wang and Haihong Liu and Zhonghua Miao and Jin Zhou", title = "Circadian Rhythm Regulated by Tumor Suppressor p53 and Time Delay in Unstressed Cells", journal = j-TCBB, volume = "19", number = "3", pages = "1523--1530", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3040368", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3040368", abstract = "Circadian function and p53 network are interconnected on the molecular level, but the dynamics induced by the interaction between the circadian factor Per2 and the tumor suppressor p53 remains poorly understood. Here, we constructed an integrative model \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Khan:2022:CEE, author = "Abhinandan Khan and Goutam Saha and Rajat Kumar Pal", title = "Controlling the Effects of External Perturbations on a Gene Regulatory Network Using Proportional-Integral-Derivative Controller", journal = j-TCBB, volume = "19", number = "3", pages = "1531--1544", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3039038", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3039038", abstract = "Gene regulatory networks are biologically robust, which imparts resilience to living systems against most external perturbations affecting them. However, there is a limit to this and disturbances beyond this limit can impart unwanted signalling on one or \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Abbaszadeh:2022:DDK, author = "Omid Abbaszadeh and Ali Azarpeyvand and Alireza Khanteymoori and Abbas Bahari", title = "Data-Driven and Knowledge-Based Algorithms for Gene Network Reconstruction on High-Dimensional Data", journal = j-TCBB, volume = "19", number = "3", pages = "1545--1557", month = may, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2020.3034861", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:56 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2020.3034861", abstract = "Previous efforts in gene network reconstruction have mainly focused on data-driven modeling, with little attention paid to knowledge-based approaches. Leveraging prior knowledge, however, is a promising paradigm that has been gaining momentum in network \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2022:GES, author = "Jian Ma", title = "Guest Editorial for Selected Papers From {ACM-BCB 2019}", journal = j-TCBB, volume = "19", number = "4", pages = "1919", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3140625", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3140625", abstract = "The papers in this special issue were presented at the ACM Conference on Bioinformatics, Computational Biology, and Health Informatics (ACM-BCB) that was held in Niagara Falls, NY, on September 7-10, 2019. The conference continued the main focus of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2022:SNU, author = "Yue Guo and Oleh Krupa and Jason Stein and Guorong Wu and Ashok Krishnamurthy", title = "{SAU-Net}: a Unified Network for Cell Counting in {$2$D} and {$3$D} Microscopy Images", journal = j-TCBB, volume = "19", number = "4", pages = "1920--1932", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3089608", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3089608", abstract = "Image-based cell counting is a fundamental yet challenging task with wide applications in biological research. In this paper, we propose a novel unified deep network framework designed to solve this problem for various cell types in both 2D and 3D images. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lazarsfeld:2022:MVC, author = "John Lazarsfeld and Jonathan Rodr{\'\i}guez and Mert Erden and Yuelin Liu and Lenore J. Cowen", title = "Majority Vote Cascading: a Semi-Supervised Framework for Improving Protein Function Prediction", journal = j-TCBB, volume = "19", number = "4", pages = "1933--1945", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3059812", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3059812", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Barshai:2022:GCN, author = "Mira Barshai and Alice Aubert and Yaron Orenstein", title = "{G4detector}: Convolutional Neural Network to Predict {DNA} {G}-Quadruplexes", journal = j-TCBB, volume = "19", number = "4", pages = "1946--1955", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3073595", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3073595", abstract = "G-quadruplexes (G4s) are nucleic acid secondary structures that form within guanine-rich DNA or RNA sequences. G4 formation can affect chromatin architecture and gene regulation, and has been associated with genomic instability, genetic diseases, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pinoli:2022:PDS, author = "Pietro Pinoli and Gaia Ceddia and Stefano Ceri and Marco Masseroli", title = "Predicting Drug Synergism by Means of Non-Negative Matrix Tri-Factorization", journal = j-TCBB, volume = "19", number = "4", pages = "1956--1967", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3091814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3091814", abstract = "Traditional drug experiments to find synergistic drug pairs are time-consuming and expensive due to the numerous possible combinations of drugs that have to be examined. Thus, computational methods that can give suggestions for synergistic drug \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qiu:2022:CRC, author = "Yang Qiu and Yang Zhang and Yifan Deng and Shichao Liu and Wen Zhang", title = "A Comprehensive Review of Computational Methods For Drug-Drug Interaction Detection", journal = j-TCBB, volume = "19", number = "4", pages = "1968--1985", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3081268", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3081268", abstract = "The detection of drug-drug interactions (DDIs) is a crucial task for drug safety surveillance, which provides effective and safe co-prescriptions of multiple drugs. Since laboratory researches are often complicated, costly and time-consuming, it&\#x0027;s \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhuang:2022:DEP, author = "Yuanying Zhuang and Xiangrong Liu and Yue Zhong and Longxin Wu", title = "A Deep Ensemble Predictor for Identifying Anti-Hypertensive Peptides Using Pretrained Protein Embedding", journal = j-TCBB, volume = "19", number = "4", pages = "1986--1992", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3068381", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3068381", abstract = "Hypertension (HT), or high blood pressure is one of the most common and main causes in cardiovascular diseases, which is also related to a series of detrimental diseases in humans. Deficiencies in effective treatment in HT are often associated with a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:DRA, author = "Xingyi Li and Ju Xiang and Fang-Xiang Wu and Min Li", title = "A Dual Ranking Algorithm Based on the Multiplex Network for Heterogeneous Complex Disease Analysis", journal = j-TCBB, volume = "19", number = "4", pages = "1993--2002", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3059046", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3059046", abstract = "Identifying biomarkers of heterogeneous complex diseases has always been one of the focuses in medical research. In previous studies, the powerful network propagation methods have been applied to finding marker genes related to specific diseases, but \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jia:2022:MCA, author = "Xibin Jia and Zheng Sun and Qing Mi and Zhenghan Yang and Dawei Yang", title = "A Multimodality-Contribution-Aware {TripNet} for Histologic Grading of Hepatocellular Carcinoma", journal = j-TCBB, volume = "19", number = "4", pages = "2003--2016", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3079216", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3079216", abstract = "Hepatocellular carcinoma (HCC) is a type of primary liver malignant tumor with a high recurrence rate and poor prognosis even undergoing resection or transplantation. Accurate discrimination of the histologic grades of HCC plays a critical role in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:NBG, author = "Shiming Wang and Jie Li and Yadong Wang and Liran Juan", title = "A Neighborhood-Based Global Network Model to Predict Drug-Target Interactions", journal = j-TCBB, volume = "19", number = "4", pages = "2017--2025", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3064614", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3064614", abstract = "The detection of drug-target interactions (DTIs) plays an important role in drug discovery and development, making DTI prediction urgent to be solved. Existing computational methods usually utilize drug similarity, target similarity and DTI information to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ding:2022:NRG, author = "Xiaojian Ding and Fan Yang and Yaoyi Zhong and Jie Cao", title = "A Novel Recursive Gene Selection Method Based on Least Square Kernel Extreme Learning Machine", journal = j-TCBB, volume = "19", number = "4", pages = "2026--2038", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3068846", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3068846", abstract = "This paper presents a recursive feature elimination (RFE) mechanism to select the most informative genes with a least square kernel extreme learning machine (LSKELM) classifier. Describing the generalization ability of LSKELM in a way that is related to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chaudhuri:2022:PMA, author = "Anik Chaudhuri and Anwoy Kumar Mohanty and Manoranjan Satpathy", title = "A Parallelizable Model for Analyzing Cancer Tissue Heterogeneity", journal = j-TCBB, volume = "19", number = "4", pages = "2039--2048", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3085894", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3085894", abstract = "In a cancer study, the heterogeneous nature of a cell population creates a lot of challenges. Efficient determination of the compositional breakup of a cell population, from gene expression measurements, is critical to the success in a cancer study. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ji:2022:SSL, author = "Cunmei Ji and Yutian Wang and Zhen Gao and Lei Li and Jiancheng Ni and Chunhou Zheng", title = "A Semi-Supervised Learning Method for {MiRNA-Disease} Association Prediction Based on Variational Autoencoder", journal = j-TCBB, volume = "19", number = "4", pages = "2049--2059", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3067338", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3067338", abstract = "MicroRNAs (miRNAs) are a class of non-coding RNAs that play critical role in many biological processes, such as cell growth, development, differentiation and aging. Increasing studies have revealed that miRNAs are closely involved in many human diseases. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mondol:2022:AAA, author = "Raktim Kumar Mondol and Nhan Duy Truong and Mohammad Reza and Samuel Ippolito and Esmaeil Ebrahimie and Omid Kavehei", title = "{AFExNet}: an Adversarial Autoencoder for Differentiating Breast Cancer Sub-Types and Extracting Biologically Relevant Genes", journal = j-TCBB, volume = "19", number = "4", pages = "2060--2070", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3066086", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3066086", abstract = "Technological advancements in high-throughput genomics enable the generation of complex and large data sets that can be used for classification, clustering, and bio-marker identification. Modern deep learning algorithms provide us with the opportunity of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2022:AHS, author = "Jingjing Ma and Haitao Jiang and Daming Zhu and Runmin Yang", title = "Algorithms and Hardness for Scaffold Filling to Maximize Increased Duo-Preservations", journal = j-TCBB, volume = "19", number = "4", pages = "2071--2079", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3083896", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3083896", abstract = "Scaffold filling is a critical step in DNA assembly. Its basic task is to fill the missing genes (fragments) into an incomplete genome (scaffold) to make it similar to the reference genome. There have been a lot of work under distinct measurements in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Matroud:2022:AAA, author = "Atheer Matroud and Christopher Tuffley and Michael Hendy", title = "An Asymmetric Alignment Algorithm for Estimating Ancestor-Descendant Edit Distance for Tandem Repeats", journal = j-TCBB, volume = "19", number = "4", pages = "2080--2091", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3059239", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3059239", abstract = "Tandem repeats are repetitive structures present in some DNA sequences, consisting of many repeated copies of a single motif. They can serve as important markers for phylogenetic and population genetic studies, due to the high polymorphism in the number \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2022:BDD, author = "Qichang Zhao and Mengyun Yang and Zhongjian Cheng and Yaohang Li and Jianxin Wang", title = "Biomedical Data and Deep Learning Computational Models for Predicting Compound-Protein Relations", journal = j-TCBB, volume = "19", number = "4", pages = "2092--2110", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3069040", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3069040", abstract = "The identification of compound-protein relations (CPRs), which includes compound-protein interactions (CPIs) and compound-protein affinities (CPAs), is critical to drug development. A common method for compound-protein relation identification is the use \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ghosh:2022:BSS, author = "Debraj Ghosh and Rajat K. De", title = "Block Search Stochastic Simulation Algorithm ({{\em BlSSSA\/}}): a Fast Stochastic Simulation Algorithm for Modeling Large Biochemical Networks", journal = j-TCBB, volume = "19", number = "4", pages = "2111--2123", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3070123", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3070123", abstract = "Stochastic simulation algorithms are extensively used for exploring stochastic behavior of biochemical pathways/networks. Computational cost of these algorithms is high in simulating real biochemical systems due to their large size, complex structure and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sanyal:2022:CTC, author = "Ritabrata Sanyal and Devroop Kar and Ram Sarkar", title = "Carcinoma Type Classification From High-Resolution Breast Microscopy Images Using a Hybrid Ensemble of Deep Convolutional Features and Gradient Boosting Trees Classifiers", journal = j-TCBB, volume = "19", number = "4", pages = "2124--2136", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3071022", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3071022", abstract = "Breast cancer is one of the main causes behind cancer deaths in women worldwide. Yet, owing to the complexity of the histopathological images and the arduousness of manual analysis task, the entire diagnosis process becomes time-consuming and the results \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dey:2022:CGA, author = "Lopamudra Dey and Anirban Mukhopadhyay", title = "Compact Genetic Algorithm-Based Feature Selection for Sequence-Based Prediction of Dengue--Human Protein Interactions", journal = j-TCBB, volume = "19", number = "4", pages = "2137--2148", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3066597", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3066597", abstract = "Dengue Virus (DENV) infection is one of the rapidly spreading mosquito-borne viral infections in humans. Every year, around 50 million people get affected by DENV infection, resulting in 20,000 deaths. Despite the recent experiments focusing on dengue \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Feng:2022:CMI, author = "Changli Feng and Jin Wu and Haiyan Wei and Lei Xu and Quan Zou", title = "{CRCF}: a Method of Identifying Secretory Proteins of Malaria Parasites", journal = j-TCBB, volume = "19", number = "4", pages = "2149--2157", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3085589", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3085589", abstract = "Malaria is a mosquito-borne disease that results in millions of cases and deaths annually. The development of a fast computational method that identifies secretory proteins of the malaria parasite is important for research on antimalarial drugs and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2022:DDL, author = "Cheng-Hong Yang and Kuo-Chuan Wu and Li-Yeh Chuang and Hsueh-Wei Chang", title = "{DeepBarcoding}: Deep Learning for Species Classification Using {DNA} Barcoding", journal = j-TCBB, volume = "19", number = "4", pages = "2158--2165", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3056570", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3056570", abstract = "DNA barcodes with short sequence fragments are used for species identification. Because of advances in sequencing technologies, DNA barcodes have gradually been emphasized. DNA sequences from different organisms are easily and rapidly acquired. Therefore, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2022:DPI, author = "Guoxian Yu and Yeqian Yang and Yangyang Yan and Maozu Guo and Xiangliang Zhang and Jun Wang", title = "{DeepIDA}: Predicting Isoform-Disease Associations by Data Fusion and Deep Neural Networks", journal = j-TCBB, volume = "19", number = "4", pages = "2166--2176", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3058801", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3058801", abstract = "Alternative splicing produces different isoforms from the same gene locus, it is an important mechanism for regulating gene expression and proteome diversity. Although the prediction of gene(ncRNA)-disease associations has been extensively studied, few \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:DPI, author = "Jun Wang and Long Zhang and An Zeng and Dawen Xia and Jiantao Yu and Guoxian Yu", title = "{DeepIII}: Predicting Isoform-Isoform Interactions by Deep Neural Networks and Data Fusion", journal = j-TCBB, volume = "19", number = "4", pages = "2177--2187", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3068875", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3068875", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2022:DIR, author = "Zhonghao Liu and Jing Jin and Yuxin Cui and Zheng Xiong and Alireza Nasiri and Yong Zhao and Jianjun Hu", title = "{DeepSeqPanII}: an Interpretable Recurrent Neural Network Model With Attention Mechanism for Peptide-{HLA Class II} Binding Prediction", journal = j-TCBB, volume = "19", number = "4", pages = "2188--2196", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3074927", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3074927", abstract = "Human leukocyte antigen (HLA) complex molecules play an essential role in immune interactions by presenting peptides on the cell surface to T cells. With significant deep learning progress, a series of neural network-based models have been proposed and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Paltun:2022:DBD, author = "Bet{\"u}l G{\"u}ven{\c{c}} Paltun and Samuel Kaski and Hiroshi Mamitsuka", title = "{DIVERSE}: {Bayesian Data IntegratiVE} Learning for Precise Drug {ResponSE} Prediction", journal = j-TCBB, volume = "19", number = "4", pages = "2197--2207", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3065535", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3065535", abstract = "Detecting predictive biomarkers from multi-omics data is important for precision medicine, to improve diagnostics of complex diseases and for better treatments. This needs substantial experimental efforts that are made difficult by the heterogeneity of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cheng:2022:DTI, author = "Zhongjian Cheng and Cheng Yan and Fang-Xiang Wu and Jianxin Wang", title = "Drug-Target Interaction Prediction Using Multi-Head Self-Attention and Graph Attention Network", journal = j-TCBB, volume = "19", number = "4", pages = "2208--2218", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3077905", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3077905", abstract = "Identifying drug-target interactions (DTIs) is an important step in the process of new drug discovery and drug repositioning. Accurate predictions for DTIs can improve the efficiency in the drug discovery and development. Although rapid advances in deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:DMD, author = "Dongyuan Li and Shuyao Zhang and Xiaoke Ma", title = "Dynamic Module Detection in Temporal Attributed Networks of Cancers", journal = j-TCBB, volume = "19", number = "4", pages = "2219--2230", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3069441", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3069441", abstract = "Tracking the dynamic modules (modules change over time) during cancer progression is essential for studying cancer pathogenesis, diagnosis, and therapy. However, current algorithms only focus on detecting dynamic modules from temporal cancer networks \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2022:ECD, author = "Chuang Liu and Yao Dai and Keping Yu and Zi-Ke Zhang", title = "Enhancing Cancer Driver Gene Prediction by Protein--Protein Interaction Network", journal = j-TCBB, volume = "19", number = "4", pages = "2231--2240", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3063532", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3063532", abstract = "With the advances in gene sequencing technologies, millions of somatic mutations have been reported in the past decades, but mining cancer driver genes with oncogenic mutations from these data remains a critical and challenging area of research. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:FSD, author = "Yan Wang and Lei Zhang and Xin Shu and Yangqin Feng and Zhang Yi and Qing Lv", title = "Feature-Sensitive Deep Convolutional Neural Network for Multi-Instance Breast Cancer Detection", journal = j-TCBB, volume = "19", number = "4", pages = "2241--2251", month = jul, year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3060183", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:00:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3060183", abstract = "To obtain a well-performed computer-aided detection model for detecting breast cancer, it is usually needed to design an effective and efficient algorithm and a well-labeled dataset to train it. In this paper, first, a multi-instance mammography clinic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2022:GESa, author = "Da Yan and Hong Qin and Hsiang-Yun Wu and Jake Y. Chen", title = "Guest Editorial for Selected Papers From {BIOKDD 2020}", journal = j-TCBB, volume = "19", number = "5", pages = "2545--2546", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3176912", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3176912", abstract = "THE 19th International Workshop on Data Mining in Bioinformatics (BIOKDD 2020) was held virtually on August 24, 2020 due to the COVID-19 pandemic. BIOKDD 2020 featured the special theme of ``Battling COVID-19'' which particularly welcomed paper submissions \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mendonca-Neto:2022:GSM, author = "Rayol Mendonca-Neto and Zhi Li and David Feny{\"o} and Claudio T. Silva and Fab{\'\i}ola G. Nakamura and Eduardo F. Nakamura", title = "A Gene Selection Method Based on Outliers for Breast Cancer Subtype Classification", journal = j-TCBB, volume = "19", number = "5", pages = "2547--2559", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3132339", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3132339", abstract = "Breast cancer is the second most common cancer type and is the leading cause of cancer-related deaths worldwide. Since it is a heterogeneous disease, subtyping breast cancer plays an important role in performing a specific treatment. Gene expression data \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jin:2022:KEM, author = "Yuanyuan Jin and Wendi Ji and Wei Zhang and Xiangnan He and Xinyu Wang and Xiaoling Wang", title = "A {KG}-Enhanced Multi-Graph Neural Network for Attentive Herb Recommendation", journal = j-TCBB, volume = "19", number = "5", pages = "2560--2571", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3115489", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3115489", abstract = "Traditional Chinese Medicine (TCM) has the longest clinical history in Asia and contributes a lot to health maintenance worldwide. An essential step during the TCM diagnostic process is syndrome induction, which comprehensively analyzes the symptoms and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Martins:2022:LPM, author = "Andreia S. Martins and Marta Gromicho and Susana Pinto and Mamede de Carvalho and Sara C. Madeira", title = "Learning Prognostic Models Using Disease Progression Patterns: Predicting the Need for Non-Invasive Ventilation in Amyotrophic Lateral Sclerosis", journal = j-TCBB, volume = "19", number = "5", pages = "2572--2583", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3078362", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3078362", abstract = "Amyotrophic Lateral Sclerosis is a devastating neurodegenerative disease causing rapid degeneration of motor neurons and usually leading to death by respiratory failure. Since there is no cure, treatment&\#x2019;s goal is to improve symptoms and prolong \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2022:LBT, author = "Qingyu Chen and Jingcheng Du and Alexis Allot and Zhiyong Lu", title = "{LitMC-BERT}: Transformer-Based Multi-Label Classification of Biomedical Literature With An Application on {COVID-19} Literature Curation", journal = j-TCBB, volume = "19", number = "5", pages = "2584--2595", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3173562", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3173562", abstract = "The rapid growth of biomedical literature poses a significant challenge for curation and interpretation. This has become more evident during the COVID-19 pandemic. LitCovid, a literature database of COVID-19 related papers in PubMed, has accumulated over \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yella:2022:MDD, author = "Jaswanth K. Yella and Anil G. Jegga", title = "{MGATRx}: Discovering Drug Repositioning Candidates Using Multi-View Graph Attention", journal = j-TCBB, volume = "19", number = "5", pages = "2596--2604", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3082466", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3082466", abstract = "In-silico drug repositioning or predicting new indications for approved or late-stage clinical trial drugs is a resourceful and time-efficient strategy in drug discovery. However, inferring novel candidate drugs for a disease is challenging, given the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2022:SDC, author = "Jieli Zhou and Baoyu Jing and Zeya Wang and Hongyi Xin and Hanghang Tong", title = "{SODA}: Detecting {COVID-19} in Chest {X}-Rays With Semi-Supervised Open Set Domain Adaptation", journal = j-TCBB, volume = "19", number = "5", pages = "2605--2612", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3066331", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3066331", abstract = "Due to the shortage of COVID-19 viral testing kits, radiology imaging is used to complement the screening process. Deep learning based methods are promising in automatically detecting COVID-19 disease in chest x-ray images. Most of these works first train \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2022:CMC, author = "Xiaojuan Guo and Kewei Chen and Yinghua Chen and Chengjie Xiong and Yi Su and Li Yao and Eric M. Reiman", title = "A Computational {Monte Carlo} Simulation Strategy to Determine the Temporal Ordering of Abnormal Age Onset Among Biomarkers of {Alzheimer&\#x0027;s} Disease", journal = j-TCBB, volume = "19", number = "5", pages = "2613--2622", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3106939", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3106939", abstract = "To quantitatively determining the temporal ordering of abnormal age onsets (AAO) among various biomarkers for Alzheimer&\#x0027;s disease (AD), we introduced a computational Monte-Carlo simulation (CMCS) to statistically examine such ordering of an AAO \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xiong:2022:MFI, author = "Zhankun Xiong and Feng Huang and Ziyan Wang and Shichao Liu and Wen Zhang", title = "A Multimodal Framework for Improving {{\em in Silico\/}} Drug Repositioning With the Prior Knowledge From Knowledge Graphs", journal = j-TCBB, volume = "19", number = "5", pages = "2623--2631", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3103595", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3103595", abstract = "Drug repositioning/repurposing is a very important approach towards identifying novel treatments for diseases in drug discovery. Recently, large-scale biological datasets are increasingly available for pharmaceutical research and promote the development \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ning:2022:NMI, author = "Qiao Ning and Xiaowei Zhao and Zhiqiang Ma", title = "A Novel Method for Identification of Glutarylation Sites Combining Borderline-{SMOTE} With {Tomek} Links Technique in Imbalanced Data", journal = j-TCBB, volume = "19", number = "5", pages = "2632--2641", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3095482", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3095482", abstract = "Glutarylation is a type of post-translational modification that occurs on lysine residues. It plays an irreplaceable role in various cellular functions. Therefore, identification of glutarylation sites is significant for understanding the molecular \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{LeMay:2022:PTA, author = "Matthew LeMay and Ran Libeskind-Hadas and Yi-Chieh Wu", title = "A Polynomial-Time Algorithm for Minimizing the Deep Coalescence Cost for Level-1 Species Networks", journal = j-TCBB, volume = "19", number = "5", pages = "2642--2653", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3105922", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3105922", abstract = "Phylogenetic analyses commonly assume that the species history can be represented as a tree. However, in the presence of hybridization, the species history is more accurately captured as a network. Despite several advances in modeling phylogenetic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2022:AED, author = "Xuan Yang and Chen Yang and Jimeng Lei and Jianxiao Liu", title = "An Approach of Epistasis Detection Using Integer Linear Programming Optimizing {Bayesian} Network", journal = j-TCBB, volume = "19", number = "5", pages = "2654--2671", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3092719", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3092719", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:ESI, author = "Chao Li and Jun Sun and Li-Wei Li and Xiaojun Wu and Vasile Palade", title = "An Effective Swarm Intelligence Optimization Algorithm for Flexible Ligand Docking", journal = j-TCBB, volume = "19", number = "5", pages = "2672--2684", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3103777", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3103777", abstract = "In general, flexible ligand docking is used for docking simulations under the premise that the position of the binding site is already known, and meanwhile it can also be used without prior knowledge of the binding site. However, most of the optimization \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ranjan:2022:ETI, author = "Ashish Ranjan and David Fern{\'a}ndez-Baca and Sudhakar Tripathi and Akshay Deepak", title = "An Ensemble {Tf-Idf} Based Approach to Protein Function Prediction via Sequence Segmentation", journal = j-TCBB, volume = "19", number = "5", pages = "2685--2696", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3093060", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3093060", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gudur:2022:FBE, author = "Venkateshwarlu Yellaswamy Gudur and Sidharth Maheshwari and Amit Acharyya and Rishad Shafik", title = "An {FPGA} Based Energy-Efficient Read Mapper With Parallel Filtering and In-Situ Verification", journal = j-TCBB, volume = "19", number = "5", pages = "2697--2711", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3106311", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3106311", abstract = "In the assembly pipeline of Whole Genome Sequencing (WGS), read mapping is a widely used method to re-assemble the genome. It employs approximate string matching and dynamic programming-based algorithms on a large volume of data and associated structures, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qin:2022:ADP, author = "Xinyi Qin and Lu Zhang and Min Liu and Ziwei Xu and Guangzhong Liu", title = "{ASFold-DNN}: Protein Fold Recognition Based on Evolutionary Features With Variable Parameters Using Full Connected Neural Network", journal = j-TCBB, volume = "19", number = "5", pages = "2712--2722", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3089168", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3089168", abstract = "Protein fold recognition contribute to comprehend the function of proteins, which is of great help to the gene therapy of diseases and the development of new drugs. Researchers have been working in this direction and have made considerable achievements, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cheng:2022:ADC, author = "Jianhong Cheng and Wei Zhao and Jin Liu and Xingzhi Xie and Shangjie Wu and Liangliang Liu and Hailin Yue and Junjian Li and Jianxin Wang and Jun Liu", title = "Automated Diagnosis of {COVID-19} Using Deep Supervised Autoencoder With Multi-View Features From {CT} Images", journal = j-TCBB, volume = "19", number = "5", pages = "2723--2736", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3102584", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3102584", abstract = "Accurate and rapid diagnosis of coronavirus disease 2019 (COVID-19) from chest CT scans is of great importance and urgency during the worldwide outbreak. However, radiologists have to distinguish COVID-19 pneumonia from other pneumonia in a large number \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2022:CCA, author = "Zhihao Huang and Yan Wang and Xiaoke Ma", title = "Clustering of Cancer Attributed Networks by Dynamically and Jointly Factorizing Multi-Layer Graphs", journal = j-TCBB, volume = "19", number = "5", pages = "2737--2748", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3090586", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3090586", abstract = "The accumulated omic data provides an opportunity to exploit the mechanisms of cancers and poses a challenge for their integrative analysis. Although extensive efforts have been devoted to address this issue, the current algorithms result in undesirable \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Arif:2022:DDL, author = "Muhammad Arif and Muhammad Kabir and Saeed Ahmed and Abid Khan and Fang Ge and Adel Khelifi and Dong-Jun Yu", title = "{DeepCPPred}: a Deep Learning Framework for the Discrimination of Cell-Penetrating Peptides and Their Uptake Efficiencies", journal = j-TCBB, volume = "19", number = "5", pages = "2749--2759", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3102133", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3102133", abstract = "Cell-penetrating peptides (CPPs) are special peptides capable of carrying a variety of bioactive molecules, such as genetic materials, short interfering RNAs and nanoparticles, into cells. Recently, research on CPP has gained substantial interest from \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pu:2022:DDT, author = "Yuqian Pu and Jiawei Li and Jijun Tang and Fei Guo", title = "{DeepFusionDTA}: Drug-Target Binding Affinity Prediction With Information Fusion and Hybrid Deep-Learning Ensemble Model", journal = j-TCBB, volume = "19", number = "5", pages = "2760--2769", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3103966", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3103966", abstract = "Identification of drug-target interaction (DTI) is the most important issue in the broad field of drug discovery. Using purely biological experiments to verify drug-target binding profiles takes lots of time and effort, so computational technologies for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dutta:2022:DDA, author = "Pratik Dutta and Aditya Prakash Patra and Sriparna Saha", title = "{DeePROG}: Deep Attention-Based Model for Diseased Gene Prognosis by Fusing Multi-Omics Data", journal = j-TCBB, volume = "19", number = "5", pages = "2770--2781", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3090302", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3090302", abstract = "An in-depth exploration of gene prognosis using different methodologies aids in understanding various biological regulations of genes in disease pathobiology and molecular functions. Interpreting gene functions at biological and molecular levels remains a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2022:DLC, author = "Weizhong Zhao and Jinyong Zhang and Jincai Yang and Xingpeng Jiang and Tingting He", title = "Document-Level Chemical-Induced Disease Relation Extraction via Hierarchical Representation Learning", journal = j-TCBB, volume = "19", number = "5", pages = "2782--2793", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3086090", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3086090", abstract = "Over the past decades, Chemical-induced Disease (CID) relations have attracted extensive attention in biomedical community, reflecting wide applications in biomedical research and healthcare field. However, prior efforts fail to make full use of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ajmal:2022:DBN, author = "Hamda B. Ajmal and Michael G. Madden", title = "Dynamic {Bayesian} Network Learning to Infer Sparse Models From Time Series Gene Expression Data", journal = j-TCBB, volume = "19", number = "5", pages = "2794--2805", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3092879", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3092879", abstract = "One of the key challenges in systems biology is to derive gene regulatory networks (GRNs) from complex high-dimensional sparse data. Bayesian networks (BNs) and dynamic Bayesian networks (DBNs) have been widely applied to infer GRNs from gene expression \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2022:EIB, author = "Jian Liu and Jialiang Sun and Yongzhuang Liu", title = "Effective Identification of Bacterial Genomes From Short and Long Read Sequencing Data", journal = j-TCBB, volume = "19", number = "5", pages = "2806--2816", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3095164", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3095164", abstract = "With the development of sequencing technology, microbiological genome sequencing analysis has attracted extensive attention. For inexperienced users without sufficient bioinformatics skills, making sense of sequencing data for microbial identification, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ghosh:2022:ERF, author = "Debopriya Ghosh and Javier Cabrera", title = "Enriched Random Forest for High Dimensional Genomic Data", journal = j-TCBB, volume = "19", number = "5", pages = "2817--2828", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3089417", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3089417", abstract = "Ensemble methods such as random forest works well on high-dimensional datasets. However, when the number of features is extremely large compared to the number of samples and the percentage of truly informative feature is very small, performance of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2022:GDC, author = "Pei Wang and Daojie Wang", title = "Gene Differential Co-Expression Networks Based on {RNA-Seq}: Construction and Its Applications", journal = j-TCBB, volume = "19", number = "5", pages = "2829--2841", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3103280", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3103280", abstract = "Gene co-expression network (GCN) becomes an increasingly important tool in omics data analysis. A great challenge for GCN construction is that the sample size is far lower than the number of genes. Traditional methods rely on considerable samples. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Naik:2022:IER, author = "Musab Naik and Luis Rueda and Akram Vasighizaker", title = "Identification of Enriched Regions in {ChIP-Seq} Data via a Linear-Time Multi-Level Thresholding Algorithm", journal = j-TCBB, volume = "19", number = "5", pages = "2842--2850", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3104734", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3104734", abstract = "Chromatin immunoprecipitation (ChIP&\#x2013;Seq) has emerged as a superior alternative to microarray technology as it provides higher resolution, less noise, greater coverage and wider dynamic range. While ChIP-Seq enables probing of DNA-protein \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ko:2022:IFM, author = "Young-Joon Ko and Sangsoo Kim and Cheol-Ho Pan and Keunwan Park", title = "Identification of Functional Microbial Modules Through Network-Based Analysis of Meta-Microbial Features Using Matrix Factorization", journal = j-TCBB, volume = "19", number = "5", pages = "2851--2862", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3100893", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3100893", abstract = "As the microbiome is composed of a variety of microbial interactions, it is imperative in microbiome research to identify a microbial sub-community that collectively conducts a specific function. However, current methodologies have been highly limited to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Song:2022:ICP, author = "Junrong Song and Wei Peng and Feng Wang", title = "Identifying Cancer Patient Subgroups by Finding Co-Modules From the Driver Mutation Profiles and Downstream Gene Expression Profiles", journal = j-TCBB, volume = "19", number = "5", pages = "2863--2872", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3106344", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3106344", abstract = "Nowadays, the heterogeneous characteristics of cancer patients throw a big challenge to precision medicine and targeted therapy. Identifying cancer subtypes shed new light on effective personalized cancer medicine, future therapeutic strategies and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2022:ILE, author = "Siyuan Zhao and Jun Meng and Qiang Kang and Yushi Luan", title = "Identifying {LncRNA-Encoded} Short Peptides Using Optimized Hybrid Features and Ensemble Learning", journal = j-TCBB, volume = "19", number = "5", pages = "2873--2881", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3104288", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:01 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3104288", abstract = "Long non-coding RNA (lncRNA) contains short open reading frames (sORFs), and sORFs-encoded short peptides (SEPs) have become the focus of scientific studies due to their crucial role in life activities. The identification of SEPs is vital to further \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2022:GESb, author = "Da Yan and Zhaohui S. Qin and Debswapna Bhattacharya and Jake Y. Chen", title = "Guest Editorial for Selected Papers From {BIOKDD 2021}", journal = j-TCBB, volume = "19", number = "6", pages = "3068--3069", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3208759", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3208759", abstract = "The papers in this special section were presented at the 20th International Workshop on Data Mining in Bioinformatics (BIOKDD 2021) that was held virtually on August 15, 2021. The conference featured the special theme of ``Artificial Intelligence'' in \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ye:2022:KGE, author = "Cheng Ye and Rowan Swiers and Stephen Bonner and Ian Barrett", title = "A Knowledge Graph-Enhanced Tensor Factorisation Model for Discovering Drug Targets", journal = j-TCBB, volume = "19", number = "6", pages = "3070--3080", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3197320", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3197320", abstract = "The drug discovery and development process is a long and expensive one, costing over 1 billion USD on average per drug and taking 10-15 years. To reduce the high levels of attrition throughout the process, there has been a growing interest in applying \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dong:2022:MBM, author = "Ngan Dong and Stefanie M{\"u}cke and Megha Khosla", title = "{MuCoMiD}: a Multitask Graph Convolutional Learning Framework for {MiRNA}-Disease Association Prediction", journal = j-TCBB, volume = "19", number = "6", pages = "3081--3092", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3176456", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3176456", abstract = "Growing evidence from recent studies implies that microRNAs or miRNAs could serve as biomarkers in various complex human diseases. Since wet-lab experiments for detecting miRNAs associated with a disease are expensive and time-consuming, machine learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Aoki:2022:HMT, author = "Raquel Aoki and Frederick Tung and Gabriel L. Oliveira", title = "Heterogeneous Multi-Task Learning With Expert Diversity", journal = j-TCBB, volume = "19", number = "6", pages = "3093--3102", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3175456", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3175456", abstract = "Predicting multiple heterogeneous biological and medical targets is a challenge for traditional deep learning models. In contrast to single-task learning, in which a separate model is trained for each target, multi-task learning (MTL) optimizes a single \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sefer:2022:BSI, author = "Emre Sefer", title = "{BioCode}: a Data-Driven Procedure to Learn the Growth of Biological Networks", journal = j-TCBB, volume = "19", number = "6", pages = "3103--3113", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3165092", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3165092", abstract = "Probabilistic biological network growth models have been utilized for many tasks including but not limited to capturing mechanism and dynamics of biological growth activities, null model representation, capturing anomalies, etc. Well-known examples of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mukherjee:2022:FOR, author = "Kingshuk Mukherjee and Daniel Dole-Muinos and Massimiliano Rossi and Ayomide Ajayi and Mattia Prosperi and Christina Boucher", title = "Finding Overlapping {Rmaps} via Clustering", journal = j-TCBB, volume = "19", number = "6", pages = "3114--3123", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3132534", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3132534", abstract = "Optical mapping is a method for creating high resolution restriction maps of an entire genome. Optical mapping has been largely automated, and first produces single molecule restriction maps, called Rmaps, which are assembled to generate genome wide \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2022:GES, author = "De-Shuang Huang and Kyungsook Han and Tatsuya Akutsu", title = "Guest Editorial for Special Section on the {16th International Conference on Intelligent Computing (ICIC)}", journal = j-TCBB, volume = "19", number = "6", pages = "3124--3125", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3150232", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3150232", abstract = "The eight papers in this special section were presented at the Sixteenth International Conference on Intelligent Computing (ICIC) that was held in Bari, Italy, on October 2-5, 2020. ICIC was formed to provide an annual forum dedicated to the emerging and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2022:GPC, author = "Tengsheng Jiang and Yuhui Chen and Shixuan Guan and Zhongtian Hu and Weizhong Lu and Qiming Fu and Yijie Ding and Haiou Li and Hongjie Wu", title = "{G} Protein-Coupled Receptor Interaction Prediction Based on Deep Transfer Learning", journal = j-TCBB, volume = "19", number = "6", pages = "3126--3134", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3128172", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3128172", abstract = "G protein-coupled receptors (GPCRs) account for about 40&\#x0025; to 50&\#x0025; of drug targets. Many human diseases are related to G protein coupled receptors. Accurate prediction of GPCR interaction is not only essential to understand its structural role,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chou:2022:NAI, author = "Hsin-Hung Chou and Ching-Tien Hsu and Chin-Wei Hsu and Kai-Hsun Yao and Hao-Ching Wang and Sun-Yuan Hsieh", title = "Novel Algorithm for Improved Protein Classification Using Graph Similarity", journal = j-TCBB, volume = "19", number = "6", pages = "3135--3143", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3125836", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3125836", abstract = "Considerable sequence data are produced in genome annotation projects that relate to molecular levels, structural similarities, and molecular and biological functions. In structural genomics, the most essential task involves resolving protein structures \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2022:PVD, author = "Qinhu Zhang and Yindong Zhang and Siguo Wang and Zhan-Heng Chen and Valeriya Gribova and Vladimir Fedorovich Filaretov and De-Shuang Huang", title = "Predicting In-Vitro {DNA}--Protein Binding With a Spatially Aligned Fusion of Sequence and Shape", journal = j-TCBB, volume = "19", number = "6", pages = "3144--3153", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3133869", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3133869", abstract = "Discovery of transcription factor binding sites (TFBSs) is of primary importance for understanding the underlying binding mechanic and gene regulation process. Growing evidence indicates that apart from the primary DNA sequences, DNA shape landscape has a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fang:2022:DES, author = "Min Fang and Yufeng He and Zhihua Du and Vladimir N. Uversky", title = "{DeepCLD}: an Efficient Sequence-Based Predictor of Intrinsically Disordered Proteins", journal = j-TCBB, volume = "19", number = "6", pages = "3154--3159", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3124273", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3124273", abstract = "Intrinsic disorder is common in proteins, plays important roles in protein functionality, and is commonly associated with various human diseases. To have an accurate tool for the annotation of intrinsic disorder in proteins, this paper proposes a novel \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2022:PMD, author = "Xinguo Lu and Jinxin Li and Zhenghao Zhu and Yue Yuan and Guanyuan Chen and Keren He", title = "Predicting {miRNA}--Disease Associations via Combining Probability Matrix Feature Decomposition With Neighbor Learning", journal = j-TCBB, volume = "19", number = "6", pages = "3160--3170", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3097037", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3097037", abstract = "Predicting the associations of miRNAs and diseases may uncover the causation of various diseases. Many methods are emerging to tackle the sparse and unbalanced disease related miRNA prediction. Here, we propose a Probabilistic matrix decomposition \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2022:ETM, author = "Qing-Wen Wu and Rui-Fen Cao and Jun-Feng Xia and Jian-Cheng Ni and Chun-Hou Zheng and Yan-Sen Su", title = "Extra Trees Method for Predicting {LncRNA}--Disease Association Based On Multi-Layer Graph Embedding Aggregation", journal = j-TCBB, volume = "19", number = "6", pages = "3171--3178", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3113122", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3113122", abstract = "Lots of experimental studies have revealed the significant associations between lncRNAs and diseases. Identifying accurate associations will provide a new perspective for disease therapy. Calculation-based methods have been developed to solve these \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:PCL, author = "Bo Li and Yihui Tian and Yang Tian and Shihua Zhang and Xiaolong Zhang", title = "Predicting Cancer Lymph-Node Metastasis From {LncRNA} Expression Profiles Using Local Linear Reconstruction Guided Distance Metric Learning", journal = j-TCBB, volume = "19", number = "6", pages = "3179--3189", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3149791", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3149791", abstract = "Lymph-node metastasis is the most perilous cancer progressive state, where long non-coding RNA (lncRNA) has been confirmed to be an important genetic indicator in cancer prediction. However, lncRNA expression profile is often characterized of large \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:ILM, author = "Wen Li and Shulin Wang and Junlin Xu and Ju Xiang", title = "Inferring Latent {MicroRNA}--Disease Associations on a Gene-Mediated Tripartite Heterogeneous Multiplexing Network", journal = j-TCBB, volume = "19", number = "6", pages = "3190--3201", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3143770", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3143770", abstract = "MicroRNA (miRNA) is a class of non-coding single-stranded RNA molecules encoded by endogenous genes with a length of about 22 nucleotides. MiRNAs have been successfully identified as differentially expressed in various cancers. There is evidence that \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xie:2022:DLP, author = "Yulian Xie and Min Liu and Shirui Zhou and Yaonan Wang", title = "A Deep Local Patch Matching Network for Cell Tracking in Microscopy Image Sequences Without Registration", journal = j-TCBB, volume = "19", number = "6", pages = "3202--3212", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3113129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3113129", abstract = "Cell tracking is critical for the modeling of plant cell growth patterns. A local graph matching algorithm is proposed to track cells by exploiting the tight spatial topology of cells. However, the local graph matching approach lacks robustness in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhanpeng:2022:MCM, author = "Huang Zhanpeng and Wu Jiekang", title = "A Multiview Clustering Method With Low-Rank and Sparsity Constraints for Cancer Subtyping", journal = j-TCBB, volume = "19", number = "6", pages = "3213--3223", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3122917", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3122917", abstract = "Multiomics data clustering is one of the major challenges in the field of precision medicine. Integration of multiomics data for cancer subtyping can improve the understanding on cancer and reveal systems-level insights. How to integrate multiomics data \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Aleb:2022:MAM, author = "Nassima Aleb", title = "A Mutual Attention Model for Drug Target Binding Affinity Prediction", journal = j-TCBB, volume = "19", number = "6", pages = "3224--3232", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3121275", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3121275", abstract = "Vrious machine learning approaches have been developed for drug-target interaction (DTI) prediction. One class of these approaches, DTBA, is interested in Drug-Target Binding Affinity strength, rather than focusing merely on the presence or absence of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhu:2022:NMI, author = "Jianshen Zhu and Naveed Ahmed Azam and Fan Zhang and Aleksandar Shurbevski and Kazuya Haraguchi and Liang Zhao and Hiroshi Nagamochi and Tatsuya Akutsu", title = "A Novel Method for Inferring Chemical Compounds With Prescribed Topological Substructures Based on Integer Programming", journal = j-TCBB, volume = "19", number = "6", pages = "3233--3245", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3112598", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3112598", abstract = "Drug discovery is one of the major goals of computational biology and bioinformatics. A novel framework has recently been proposed for the design of chemical graphs using both artificial neural networks (ANNs) and mixed integer linear programming (MILP). \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2022:RGI, author = "Yueran Yang and Yu Zhang and Shuai Li and Xubin Zheng and Man-Hon Wong and Kwong-Sak Leung and Lixin Cheng", title = "A Robust and Generalizable Immune-Related Signature for Sepsis Diagnostics", journal = j-TCBB, volume = "19", number = "6", pages = "3246--3254", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3107874", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3107874", abstract = "High-throughput sequencing can detect tens of thousands of genes in parallel, providing opportunities for improving the diagnostic accuracy of multiple diseases including sepsis, which is an aggressive inflammatory response to infection that can cause \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ni:2022:AES, author = "Xinzhe Ni and Bohao Geng and Haoyu Zheng and Jiawei Shi and Gang Hu and Jianzhao Gao", title = "Accurate Estimation of Single-Cell Differentiation Potency Based on Network Topology and Gene Ontology Information", journal = j-TCBB, volume = "19", number = "6", pages = "3255--3262", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3112951", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3112951", abstract = "One important task in single-cell analysis is to quantify the differentiation potential of single cells. Though various single-cell potency measures have been proposed, they are based on individual biological sources, thus not robust and reliable. It is \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2022:APH, author = "Yiming Li and Min Zeng and Yifan Wu and Yaohang Li and Min Li", title = "Accurate Prediction of Human Essential Proteins Using Ensemble Deep Learning", journal = j-TCBB, volume = "19", number = "6", pages = "3263--3271", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3122294", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3122294", abstract = "Essential proteins are considered the foundation of life as they are indispensable for the survival of living organisms. Computational methods for essential protein discovery provide a fast way to identify essential proteins. But most of them heavily rely \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bi:2022:ABB, author = "Jingshu Bi and Yuanjie Zheng and Chongjing Wang and Yanhui Ding", title = "An Attention Based Bidirectional {LSTM} Method to Predict the Binding of {TCR} and Epitope", journal = j-TCBB, volume = "19", number = "6", pages = "3272--3280", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3115353", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3115353", abstract = "The T-cell epitope prediction has always been a long-term challenge in immunoinformatics and bioinformatics. Studying the specific recognition between T-cell receptor (TCR) and peptide-major histocompatibility complex (p-MHC) complexes can help us better \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mazrouee:2022:AAR, author = "Sepideh Mazrouee", title = "{ARHap}: Association Rule Haplotype Phasing", journal = j-TCBB, volume = "19", number = "6", pages = "3281--3294", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3119955", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3119955", abstract = "This article proposes a novel approach for Individual Human phasing through discovery of interesting hidden relations among single variant sites. The proposed framework, called ARHap, learns strong association rules among variant loci on the genome and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2022:BBR, author = "Jiani Ma and Lin Zhang and Shaojie Li and Hui Liu", title = "{BRPCA}: Bounded Robust Principal Component Analysis to Incorporate Similarity Network for {N7}-Methylguanosine({mo$^7$G}) Site-Disease Association Prediction", journal = j-TCBB, volume = "19", number = "6", pages = "3295--3306", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3109055", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3109055", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Parvini:2022:CPD, author = "Ghazaleh Parvini and Katherine Braught and David Fern{\'a}ndez-Baca", title = "Checking Phylogenetics Decisiveness in Theory and in Practice", journal = j-TCBB, volume = "19", number = "6", pages = "3307--3316", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3128381", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3128381", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Battistella:2022:CCO, author = "Enzo Battistella and Maria Vakalopoulou and Roger Sun and Th{\'e}o Estienne and Marvin Lerousseau and Sergey Nikolaev and {\'E}milie Alvarez Andres and Alexandre Carr{\'e} and St{\'e}phane Niyoteka and Charlotte Robert and Nikos Paragios and {\'E}ric Deutsch", title = "{COMBING}: Clustering in Oncology for Mathematical and Biological Identification of Novel Gene Signatures", journal = j-TCBB, volume = "19", number = "6", pages = "3317--3331", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3123910", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3123910", abstract = "Precision medicine is a paradigm shift in healthcare relying heavily on genomics data. However, the complexity of biological interactions, the large number of genes as well as the lack of comparisons on the analysis of data, remain a tremendous bottleneck \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mongia:2022:CPD, author = "Aanchal Mongia and Emilie Chouzenoux and Angshul Majumdar", title = "Computational Prediction of Drug-Disease Association Based on Graph-Regularized One Bit Matrix Completion", journal = j-TCBB, volume = "19", number = "6", pages = "3332--3339", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3189879", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3189879", abstract = "Investigation of existing drugs is an effective alternative to the discovery of new drugs for treating diseases. This task of drug re-positioning can be assisted by various kinds of computational methods to predict the best indication for a drug given the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shetta:2022:CMV, author = "Omar Shetta and Mahesan Niranjan and Srinandan Dasmahapatra", title = "Convex Multi-View Clustering Via Robust Low Rank Approximation With Application to Multi-Omic Data", journal = j-TCBB, volume = "19", number = "6", pages = "3340--3352", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3122961", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3122961", abstract = "Recent advances in high throughput technologies have made large amounts of biomedical omics data accessible to the scientific community. Single omic data clustering has proved its impact in the biomedical and biological research fields. Multi-omic data \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Aakesson:2022:CNN, author = "Mattias {\AA}kesson and Prashant Singh and Fredrik Wrede and Andreas Hellander", title = "Convolutional Neural Networks as Summary Statistics for Approximate {Bayesian} Computation", journal = j-TCBB, volume = "19", number = "6", pages = "3353--3365", year = "2022", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3108695", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Wed Oct 18 13:01:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3108695", abstract = "Approximate Bayesian Computation is widely used in systems biology for inferring parameters in stochastic gene regulatory network models. Its performance hinges critically on the ability to summarize high-dimensional system responses such as time series \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:CEM, author = "Qiaoming Liu and Xudong Zhao and Guohua Wang", title = "A Clustering Ensemble Method for Cell Type Detection by Multiobjective Particle Optimization", journal = j-TCBB, volume = "20", number = "1", pages = "1--14", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3132400", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3132400", abstract = "Single-cell RNA sequencing (scRNA-seq) is a new technology different from previous sequencing methods that measure the average expression level for each gene across a large population of cells. Thus, new computational methods are required to reveal cell \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:CTA, author = "Kun Liu and Hong-Dong Li and Yaohang Li and Jun Wang and Jianxin Wang", title = "A Comparison of Topologically Associating Domain Callers Based on {Hi-C} Data", journal = j-TCBB, volume = "20", number = "1", pages = "15--29", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3147805", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3147805", abstract = "Topologically associating domains (TADs) are local chromatin interaction domains, which have been shown to play an important role in gene expression regulation. TADs were originally discovered in the investigation of 3D genome organization based on High-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Klosa:2023:FSG, author = "Jan Klosa and Noah Simon and Volkmar Liebscher and D{\"o}rte Wittenburg", title = "A Fitted Sparse-Group Lasso for Genome-Based Evaluations", journal = j-TCBB, volume = "20", number = "1", pages = "30--38", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3156805", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3156805", abstract = "In life sciences, high-throughput techniques typically lead to high-dimensional data and often the number of covariates is much larger than the number of observations. This inherently comes with multicollinearity challenging a statistical analysis in a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2023:LRD, author = "Chengzhuan Yang and Lincong Fang and Qian Yu and Hui Wei", title = "A Learning Robust and Discriminative Shape Descriptor for Plant Species Identification", journal = j-TCBB, volume = "20", number = "1", pages = "39--51", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3148463", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3148463", abstract = "Plant identification based on leaf images is a widely concerned application field in artificial intelligence and botany. The key problem is extracting robust discriminative features from leaf images and assigning a measure of similarity. This study \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2023:MBD, author = "Feng Zhou and Meng-Meng Yin and Jing-Xiu Zhao and Junliang Shang and Jin-Xing Liu", title = "A Method Based On Dual-Network Information Fusion to Predict {MiRNA}--Disease Associations", journal = j-TCBB, volume = "20", number = "1", pages = "52--60", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3133006", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3133006", abstract = "MicroRNAs (miRNAs) are single-stranded small RNAs. An increasing number of studies have shown that miRNAs play a vital role in many important biological processes. However, some experimental methods to predict unknown miRNA-disease associations (MDAs) are \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2023:MPD, author = "Qiang Yu and Xiao Zhang and Yana Hu and Shengpin Chen and Liying Yang", title = "A Method for Predicting {DNA} Motif Length Based On Deep Learning", journal = j-TCBB, volume = "20", number = "1", pages = "61--73", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3158471", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3158471", abstract = "A DNA motif is a sequence pattern shared by the DNA sequence segments that bind to a specific protein. Discovering motifs in a given DNA sequence dataset plays a vital role in studying gene expression regulation. As an important attribute of the DNA motif,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2023:RFM, author = "Weixian Huang and Kaiwen Tan and Ziye Zhang and Jinlong Hu and Shoubin Dong", title = "A Review of Fusion Methods for Omics and Imaging Data", journal = j-TCBB, volume = "20", number = "1", pages = "74--93", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3143900", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3143900", abstract = "The development of omics data and biomedical images has greatly advanced the progress of precision medicine in diagnosis, treatment, and prognosis. The fusion of omics and imaging data, i.e., omics-imaging fusion, offers a new strategy for understanding \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ranjan:2023:SSB, author = "Ashish Ranjan and Archana Tiwari and Akshay Deepak", title = "A Sub-Sequence Based Approach to Protein Function Prediction via Multi-Attention Based Multi-Aspect Network", journal = j-TCBB, volume = "20", number = "1", pages = "94--105", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3130923", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3130923", abstract = "Inferring the protein function(s) via the protein sub-sequence classification is often obstructed due to lack of knowledge about function(s) of sub-sequences in the protein sequence. In this regard, we develop a novel `multi-aspect' \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bai:2023:AKD, author = "Jun Bai and Chuantao Yin and Jianfei Zhang and Yanmeng Wang and Yi Dong and Wenge Rong and Zhang Xiong", title = "Adversarial Knowledge Distillation Based Biomedical Factoid Question Answering", journal = j-TCBB, volume = "20", number = "1", pages = "106--118", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3161032", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3161032", abstract = "Biomedical factoid question answering is an essential application for biomedical information sharing. Recently, neural network based approaches have shown remarkable performance for this task. However, due to the scarcity of annotated data which requires \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bohnsack:2023:AFS, author = "Katrin Sophie Bohnsack and Marika Kaden and Julia Abel and Thomas Villmann", title = "Alignment-Free Sequence Comparison: a Systematic Survey From a Machine Learning Perspective", journal = j-TCBB, volume = "20", number = "1", pages = "119--135", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3140873", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3140873", abstract = "The encounter of large amounts of biological sequence data generated during the last decades and the algorithmic and hardware improvements have offered the possibility to apply machine learning techniques in bioinformatics. While the machine learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Joseph:2023:ACM, author = "Steffy Maria Joseph and P. S. Sathidevi", title = "An Automated {cDNA} Microarray Image Analysis for the Determination of Gene Expression Ratios", journal = j-TCBB, volume = "20", number = "1", pages = "136--150", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3135650", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3135650", abstract = "This paper proposes a fully automated technique for cDNA microarray image analysis. Initially, an effective preprocessing stage combined with gridding is built to get the individual spot regions of images. Current work begins with the proposal of a new \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Iravani:2023:IDL, author = "Sahar Iravani and Tim O. F. Conrad", title = "An Interpretable Deep Learning Approach for Biomarker Detection in {LC-MS} Proteomics Data", journal = j-TCBB, volume = "20", number = "1", pages = "151--161", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3141656", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3141656", abstract = "Analyzing mass spectrometry-based proteomics data with deep learning (DL) approaches poses several challenges due to the high dimensionality, low sample size, and high level of noise. Additionally, DL-based workflows are often hindered to be integrated \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jha:2023:AEM, author = "Kanchan Jha and Sriparna Saha", title = "Analyzing Effect of Multi-Modality in Predicting Protein-Protein Interactions", journal = j-TCBB, volume = "20", number = "1", pages = "162--173", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3157531", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3157531", abstract = "Nowadays, multiple sources of information about proteins are available such as protein sequences, 3D structures, Gene Ontology (GO), etc. Most of the works on protein-protein interaction (PPI) identification had utilized these information about proteins, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ragi:2023:AID, author = "Shankarachary Ragi and Md Hafizur Rahman and Jamison Duckworth and Kalimuthu Jawaharraj and Parvathi Chundi and Venkataramana Gadhamshetty", title = "Artificial Intelligence-Driven Image Analysis of Bacterial Cells and Biofilms", journal = j-TCBB, volume = "20", number = "1", pages = "174--184", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3138304", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3138304", abstract = "The current study explores an artificial intelligence framework for measuring the structural features from microscopy images of the bacterial biofilms. {$<$ italic$>$Desulfovibrio} {alaskensis$<$}/{italic$>$} G20 (DA-G20) grown on mild steel surfaces is used as a model \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:CGI, author = "Hao Zhang and Chuanxu Yan and Yewei Xia and Jihong Guan and Shuigeng Zhou", title = "Causal Gene Identification Using Non-Linear Regression-Based Independence Tests", journal = j-TCBB, volume = "20", number = "1", pages = "185--195", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3149864", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3149864", abstract = "With the development of biomedical techniques in the past decades, causal gene identification has become one of the most promising applications in human genome-based business, which can help doctors to evaluate the risk of certain genetic diseases and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Anjum:2023:CMA, author = "Naser Anjum and Raian Latif Nabil and Rakibul Islam Rafi and Md. Shamsuzzoha Bayzid and M. Saifur Rahman", title = "{CD-MAWS}: an Alignment-Free Phylogeny Estimation Method Using Cosine Distance on Minimal Absent Word Sets", journal = j-TCBB, volume = "20", number = "1", pages = "196--205", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3136792", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3136792", abstract = "Multiple sequence alignment has been the traditional and well established approach of sequence analysis and comparison, though it is time and memory consuming. As the scale of sequencing data is increasing day by day, the importance of faster yet accurate \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Pham:2023:CBE, author = "Tuan D. Pham", title = "Classification of \bioname{Caenorhabditis elegans} Locomotion Behaviors With Eigenfeature-Enhanced Long Short-Term Memory Networks", journal = j-TCBB, volume = "20", number = "1", pages = "206--216", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3153668", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3153668", abstract = "The free-living nematode {$<$ italic$>$Caenorhabditis} {elegans$<$}/{italic$>$} is an ideal model for understanding behavior and networks of neurons. Experimental and quantitative analyses of neural circuits and behavior have led to system-level understanding of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Manipur:2023:CDP, author = "Ichcha Manipur and Maurizio Giordano and Marina Piccirillo and Seetharaman Parashuraman and Lucia Maddalena", title = "Community Detection in Protein--Protein Interaction Networks and Applications", journal = j-TCBB, volume = "20", number = "1", pages = "217--237", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3138142", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3138142", abstract = "The ability to identify and characterize not only the protein-protein interactions but also their internal modular organization through network analysis is fundamental for understanding the mechanisms of biological processes at the molecular level. Indeed,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qureshi:2023:CMA, author = "Rizwan Qureshi and Bin Zou and Tanvir Alam and Jia Wu and Victor H. F. Lee and Hong Yan", title = "Computational Methods for the Analysis and Prediction of {EGFR}-Mutated Lung Cancer Drug Resistance: Recent Advances in Drug Design, Challenges and Future Prospects", journal = j-TCBB, volume = "20", number = "1", pages = "238--255", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3141697", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3141697", abstract = "Lung cancer is a major cause of cancer deaths worldwide, and has a very low survival rate. Non-small cell lung cancer (NSCLC) is the largest subset of lung cancers, which accounts for about 85\% of all cases. It has been well established that a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2023:CPD, author = "Jingbo Yang and Denan Zhang and Yiyang Cai and Kexin Yu and Mingming Li and Lei Liu and Xiujie Chen", title = "Computational Prediction of Drug Phenotypic Effects Based on Substructure-Phenotype Associations", journal = j-TCBB, volume = "20", number = "1", pages = "256--265", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3155453", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3155453", abstract = "Identifying drug phenotypic effects, including therapeutic effects and adverse drug reactions (ADRs), is an inseparable part for evaluating the potentiality of new drug candidates (NDCs). However, current computational methods for predicting phenotypic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lee:2023:CCP, author = "Wook Lee and Seokwoo Lee and Kyungsook Han", title = "Constructing a Cancer Patient-Specific Network Based on Second-Order Partial Correlations of Gene Expression and {DNA} Methylation", journal = j-TCBB, volume = "20", number = "1", pages = "266--276", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3145796", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3145796", abstract = "Typically patient-specific gene networks are constructed with gene expression data only. Such networks cannot distinguish direct gene interactions from indirect interactions via others such as the effect of epigenetic events to gene activity. There is an \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:CNN, author = "Zhi-Hao Liu and Cun-Mei Ji and Jian-Cheng Ni and Yu-Tian Wang and Li-Juan Qiao and Chun-Hou Zheng", title = "Convolution Neural Networks Using Deep Matrix Factorization for Predicting Circrna-Disease Association", journal = j-TCBB, volume = "20", number = "1", pages = "277--284", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3138339", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3138339", abstract = "CircRNAs have a stable structure, which gives them a higher tolerance to nucleases. Therefore, the properties of circular RNAs are beneficial in disease diagnosis. However, there are few known associations between circRNAs and disease. Biological \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hua:2023:CER, author = "Yang Hua and Xiaoning Song and Zhenhua Feng and Xiao-Jun Wu and Josef Kittler and Dong-Jun Yu", title = "{CPInformer} for Efficient and Robust Compound-Protein Interaction Prediction", journal = j-TCBB, volume = "20", number = "1", pages = "285--296", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3144008", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3144008", abstract = "Recently, deep learning has become the mainstream methodology for Compound-Protein Interaction (CPI) prediction. However, the existing compound-protein feature extraction methods have some issues that limit their performance. First, graph networks are \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:CSS, author = "Kailong Li and Lijun Quan and Yelu Jiang and Yan Li and Yiting Zhou and Tingfang Wu and Qiang Lyu", title = "{ctP$^2$ISP}: Protein--Protein Interaction Sites Prediction Using Convolution and Transformer With Data Augmentation", journal = j-TCBB, volume = "20", number = "1", pages = "297--306", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3154413", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3154413", abstract = "Protein--protein interactions are the basis of many cellular biological processes, such as cellular organization, signal transduction, and immune response. Identifying protein--protein interaction sites is essential for understanding the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Du:2023:DML, author = "Xiuquan Du and Jiajia Hu", title = "Deep Multi-Label Joint Learning for {RNA} and {DNA-Binding} Proteins Prediction", journal = j-TCBB, volume = "20", number = "1", pages = "307--320", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3150280", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3150280", abstract = "The recognition of DNA- (DBPs) and RNA-binding proteins (RBPs) is not only conducive to understanding cell function, but also a challenging task. Previous studies have shown that these proteins are usually considered separately due to different binding \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2023:DTL, author = "Xiao-Hui Yang and Zi-Jun Xi and Jie-Ping Li and Xin-Lei Feng and Xiao-Hong Zhu and Si-Yi Guo and Chun-Peng Song", title = "Deep Transfer Learning-Based Multi-Object Detection for Plant Stomata Phenotypic Traits Intelligent Recognition", journal = j-TCBB, volume = "20", number = "1", pages = "321--329", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3137810", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3137810", abstract = "Plant stomata phenotypic traits can provide a basis for enhancing crop tolerance in adversity. Manually counting the number of stomata and measuring the height and width of stomata obviously cannot satisfy the high-throughput data. How to detect and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Uner:2023:DDL, author = "Onur Can Uner and Halil Ibrahim Kuru and R. Gokberk Cinbis and Oznur Tastan and A. Ercument Cicek", title = "{DeepSide}: a Deep Learning Approach for Drug Side Effect Prediction", journal = j-TCBB, volume = "20", number = "1", pages = "330--339", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3141103", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3141103", abstract = "Drug failures due to unforeseen adverse effects at clinical trials pose health risks for the participants and lead to substantial financial losses. Side effect prediction algorithms have the potential to guide the drug design process. LINCS L1000 dataset \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2023:DGE, author = "Zimo Huang and Jun Wang and Zhongmin Yan and Lin Wan and Maozu Guo", title = "Differential Gene Expression Prediction by Ensemble Deep Networks on Histone Modification Data", journal = j-TCBB, volume = "20", number = "1", pages = "340--351", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3139634", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3139634", abstract = "Predicting differential gene expression (DGE) from Histone modifications (HM) signal is crucial to understand how HM controls cell functional heterogeneity through influencing differential gene regulation. Most existing prediction methods use fixed-length \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2023:ENN, author = "Jinghang Lin and Xiaoran Tong and Chenxi Li and Qing Lu", title = "Expectile Neural Networks for Genetic Data Analysis of Complex Diseases", journal = j-TCBB, volume = "20", number = "1", pages = "352--359", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3146795", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3146795", abstract = "The genetic etiologies of common diseases are highly complex and heterogeneous. Classic methods, such as linear regression, have successfully identified numerous variants associated with complex diseases. Nonetheless, for most diseases, the identified \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Williams:2023:FDS, author = "Lucia Williams and Alexandru I. Tomescu and Brendan Mumey", title = "Flow Decomposition With Subpath Constraints", journal = j-TCBB, volume = "20", number = "1", pages = "360--370", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3147697", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:42 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3147697", abstract = "Flow network decomposition is a natural model for problems where we are given a flow network arising from superimposing a set of weighted paths and would like to recover the underlying data, i.e., {$<$ italic$>$ decompose$<$}/{italic$>$} the flow into the original \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gonzalez-Crespo:2023:BMT, author = "Isabel Gonz{\'a}lez-Crespo and Antonio G{\'o}mez-Caama{\~n}o and {\'O}scar L{\'o}pez Pouso and John D. Fenwick and Juan Pardo-Montero", title = "A Biomathematical Model of Tumor Response to Radioimmunotherapy With {$ \alpha $PDL1} and {$ \alpha $CTLA4}", journal = j-TCBB, volume = "20", number = "2", pages = "808--821", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3174454", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3174454", abstract = "There is evidence of synergy between radiotherapy and immunotherapy. Radiotherapy can increase liberation of tumor antigens, causing activation of antitumor T-cells. This effect can be boosted with immunotherapy. Radioimmunotherapy has potential to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:CMU, author = "Xiong Li and Yangkai Lin and Chengwang Xie and Zejun Li and Min Chen and Peng Wang and Juan Zhou", title = "A Clustering Method Unifying Cell-Type Recognition and Subtype Identification for Tumor Heterogeneity Analysis", journal = j-TCBB, volume = "20", number = "2", pages = "822--832", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3203185", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3203185", abstract = "The rapid development of single-cell technology has opened up a whole new perspective for identifying cell types in multicellular organisms and understanding the relationships between them. Distinguishing different cell types and subtypes can identify the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:DLF, author = "Min Li and Wenbo Shi and Fuhao Zhang and Min Zeng and Yaohang Li", title = "A Deep Learning Framework for Predicting Protein Functions With Co-Occurrence of {GO} Terms", journal = j-TCBB, volume = "20", number = "2", pages = "833--842", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3170719", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3170719", abstract = "The understanding of protein functions is critical to many biological problems such as the development of new drugs and new crops. To reduce the huge gap between the increase of protein sequences and annotations of protein functions, many methods have \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2023:AMC, author = "Xiaoqing Peng and Wenjin Zhang and Wanxin Cui and Binrong Ding and Qingtong Lyu and Jianxin Wang", title = "{ADmeth}: a Manually Curated Database for the Differential Methylation in {Alzheimer}'s Disease", journal = j-TCBB, volume = "20", number = "2", pages = "843--851", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3178087", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3178087", abstract = "Alzheimer's disease (AD) is the most common neurodegenerative disease. More and more evidence show that DNA methylation is closely related to the pathological mechanism of AD. Many AD-associated differentially methylated genes, regions and CpG \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:ADT, author = "Qichang Zhao and Guihua Duan and Mengyun Yang and Zhongjian Cheng and Yaohang Li and Jianxin Wang", title = "{AttentionDTA}: Drug-Target Binding Affinity Prediction by Sequence-Based Deep Learning With Attention Mechanism", journal = j-TCBB, volume = "20", number = "2", pages = "852--863", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3170365", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3170365", abstract = "The identification of drug--target relations (DTRs) is substantial in drug development. A large number of methods treat DTRs as drug-target interactions (DTIs), a binary classification problem. The main drawback of these methods are the lack of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Si:2023:BAM, author = "Jiasheng Si and Liu Sun and Deyu Zhou and Jie Ren and Lin Li", title = "Biomedical Argument Mining Based on Sequential Multi-Task Learning", journal = j-TCBB, volume = "20", number = "2", pages = "864--874", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3173447", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3173447", abstract = "Biomedical argument mining aims to automatically identify and extract the argumentative structure in biomedical text. It helps to determine not only what positions people adopt, but also why they hold such opinions, which provides valuable insights into \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ghasemi:2023:CDG, author = "Mahdieh Ghasemi and Maseud Rahgozar and Kaveh Kavousi", title = "Complex Disease Genes Identification Using a Heterogeneous Network Embedding Approach", journal = j-TCBB, volume = "20", number = "2", pages = "875--882", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3175598", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3175598", abstract = "Finding the causal relation between a gene and a disease using experimental approaches is a time-consuming and expensive task. However, computational approaches are cost-efficient methods for identifying candidate genes. This article proposes a new \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jee:2023:DNM, author = "Dong Jun Jee and Yixin Kong and Hyonho Chun", title = "Deep Nonnegative Matrix Factorization Using a Variational Autoencoder With Application to Single-Cell {RNA} Sequencing Data", journal = j-TCBB, volume = "20", number = "2", pages = "883--893", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3172723", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3172723", abstract = "Single-cell RNA sequencing is used to analyze the gene expression data of individual cells, thereby adding to existing knowledge of biological phenomena. Accordingly, this technology is widely used in numerous biomedical studies. Recently, the variational \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Altuntas:2023:DAC, author = "Volkan Altuntas", title = "Diffusion Alignment Coefficient ({DAC}): a Novel Similarity Metric for Protein-Protein Interaction Network", journal = j-TCBB, volume = "20", number = "2", pages = "894--903", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3185406", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3185406", abstract = "Interaction networks can be used to predict the functions of unknown proteins using known interactions and proteins with known functions. Many graph theory or diffusion-based methods have been proposed, using the assumption that the topological properties \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Rehman:2023:DMI, author = "Mobeen Ur Rehman and Hilal Tayara and Kil To Chong", title = "{DL-m6A}: Identification of {N6-Methyladenosine} Sites in Mammals Using Deep Learning Based on Different Encoding Schemes", journal = j-TCBB, volume = "20", number = "2", pages = "904--911", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3192572", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3192572", abstract = "N6-methyladenosine (m6A) is a common post-transcriptional alteration that plays a critical function in a variety of biological processes. Although experimental approaches for identifying m6A sites have been developed and deployed, they are currently \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2023:DPD, author = "Zhong-Ze Yu and Chun-Xiang Peng and Jun Liu and Biao Zhang and Xiao-Gen Zhou and Gui-Jun Zhang", title = "{DomBpred}: Protein Domain Boundary Prediction Based on Domain-Residue Clustering Using Inter-Residue Distance", journal = j-TCBB, volume = "20", number = "2", pages = "912--922", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3175905", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3175905", abstract = "Domain boundary prediction is one of the most important problems in the study of protein structure and function, especially for large proteins. At present, most domain boundary prediction methods have low accuracy and limitations in dealing with multi-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2023:DNC, author = "Xin Huang and Benzhe Su and Chenbo Zhu and Xinyu He and Xiaohui Lin", title = "Dynamic Network Construction for Identifying Early Warning Signals Based On a Data-Driven Approach: Early Diagnosis Biomarker Discovery for Gastric Cancer", journal = j-TCBB, volume = "20", number = "2", pages = "923--931", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3176319", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3176319", abstract = "During the development of complex diseases, there is a critical transition from one status to another at a tipping point, which can be an early indicator of disease deterioration. To effectively enhance the performance of early risk identification, a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2023:EIC, author = "Xuan Lin and Zhe Quan and Zhi-Jie Wang and Yan Guo and Xiangxiang Zeng and Philip S. Yu", title = "Effectively Identifying Compound-Protein Interaction Using Graph Neural Representation", journal = j-TCBB, volume = "20", number = "2", pages = "932--943", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198003", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198003", abstract = "Effectively identifying compound-protein interactions (CPIs) is crucial for new drug design, which is an important step in silico drug discovery. Current machine learning methods for CPI prediction mainly use one-demensional (1D) compound/protein strings \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fan:2023:ECL, author = "Yongxian Fan and Guicong Sun and Xiaoyong Pan", title = "{ELMo4m6A}: a Contextual Language Embedding-Based Predictor for Detecting {RNA} {N6}-Methyladenosine Sites", journal = j-TCBB, volume = "20", number = "2", pages = "944--954", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3173323", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3173323", abstract = "N6-methyladenosine (m6A) is a universal post-transcriptional modification of RNAs, and it is widely involved in various biological processes. Identifying m6A modification sites accurately is indispensable to further investigate m6A-mediated biological \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Rashid:2023:ESP, author = "Shamima Rashid and Suresh Sundaram and Chee Keong Kwoh", title = "Empirical Study of Protein Feature Representation on Deep Belief Networks Trained With Small Data for Secondary Structure Prediction", journal = j-TCBB, volume = "20", number = "2", pages = "955--966", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3168676", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3168676", abstract = "Protein secondary structure (SS) prediction is a classic problem of computational biology and is widely used in structural characterization and to infer homology. While most SS predictors have been trained on thousands of sequences, a previous approach \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:EFI, author = "Chao Wang and Quan Zou and Ying Ju and Hua Shi", title = "{Enhancer-FRL}: Improved and Robust Identification of Enhancers and Their Activities Using Feature Representation Learning", journal = j-TCBB, volume = "20", number = "2", pages = "967--975", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3204365", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3204365", abstract = "Enhancers are crucial for precise regulation of gene expression, while enhancer identification and strength prediction are challenging because of their free distribution and tremendous number of similar fractions in the genome. Although several \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:EDD, author = "Shichao Liu and Yang Zhang and Yuxin Cui and Yang Qiu and Yifan Deng and Zhongfei Zhang and Wen Zhang", title = "Enhancing Drug-Drug Interaction Prediction Using Deep Attention Neural Networks", journal = j-TCBB, volume = "20", number = "2", pages = "976--985", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3172421", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3172421", abstract = "Drug-drug interactions are one of the main concerns in drug discovery. Accurate prediction of drug-drug interactions plays a key role in increasing the efficiency of drug research and safety when multiple drugs are co-prescribed. With various data sources \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shen:2023:EED, author = "Yang Shen and Jinlin Zhu and Zhaohong Deng and Wenwei Lu and Hongchao Wang", title = "{EnsDeepDP}: an Ensemble Deep Learning Approach for Disease Prediction Through Metagenomics", journal = j-TCBB, volume = "20", number = "2", pages = "986--998", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3201295", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3201295", abstract = "A growing number of studies show that the human microbiome plays a vital role in human health and can be a crucial factor in predicting certain human diseases. However, microbiome data are often characterized by the limited samples and high-dimensional \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{DiPersia:2023:EIP, author = "Leandro {Di Persia} and Tiago Lopez and Agustin Arce and Diego H. Milone and Georgina Stegmayer", title = "{exp2GO}: Improving Prediction of Functions in the Gene Ontology With Expression Data", journal = j-TCBB, volume = "20", number = "2", pages = "999--1008", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3167245", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3167245", abstract = "The computational methods for the prediction of gene function annotations aim to automatically find associations between a gene and a set of Gene Ontology (GO) terms describing its functions. Since the hand-made curation process of novel annotations and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Castiglione:2023:EDR, author = "Filippo Castiglione and Christine Nardini and Elia Onofri and Marco Pedicini and Paolo Tieri", title = "Explainable Drug Repurposing Approach From Biased Random Walks", journal = j-TCBB, volume = "20", number = "2", pages = "1009--1019", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3191392", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3191392", abstract = "Drug repurposing is a highly active research area, aiming at finding novel uses for drugs that have been previously developed for other therapeutic purposes. Despite the flourishing of methodologies, success is still partial, and different approaches \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nguyen:2023:EBB, author = "Tri Minh Nguyen and Thomas P. Quinn and Thin Nguyen and Truyen Tran", title = "Explaining Black Box Drug Target Prediction Through Model Agnostic Counterfactual Samples", journal = j-TCBB, volume = "20", number = "2", pages = "1020--1029", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3190266", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3190266", abstract = "Many high-performance DTA deep learning models have been proposed, but they are mostly black-box and thus lack human interpretability. Explainable AI (XAI) can make DTA models more trustworthy, and allows to distill biological knowledge from the models. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tang:2023:FMP, author = "Chunyan Tang and Cheng Zhong and Mian Wang and Fengfeng Zhou", title = "{FMGNN}: a Method to Predict Compound-Protein Interaction With Pharmacophore Features and Physicochemical Properties of Amino Acids", journal = j-TCBB, volume = "20", number = "2", pages = "1030--1040", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3172340", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3172340", abstract = "Identifying interactions between compounds and proteins is an essential task in drug discovery. To recommend compounds as new drug candidates, applying the computational approaches has a lower cost than conducting the wet-lab experiments. Machine learning-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:GMD, author = "Jiwen Liu and Zhufang Kuang and Lei Deng", title = "{GCNPCA}: {miRNA-Disease} Associations Prediction Algorithm Based on Graph Convolutional Neural Networks", journal = j-TCBB, volume = "20", number = "2", pages = "1041--1052", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3203564", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3203564", abstract = "A growing number of studies have confirmed the important role of microRNAs (miRNAs) in human diseases and the aberrant expression of miRNAs affects the onset and progression of human diseases. The discovery of disease-associated miRNAs as new biomarkers \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tian:2023:GGC, author = "Zhen Tian and Haichuan Fang and Zhixia Teng and Yangdong Ye", title = "{GOGCN}: Graph Convolutional Network on Gene Ontology for Functional Similarity Analysis of Genes", journal = j-TCBB, volume = "20", number = "2", pages = "1053--1064", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3181300", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3181300", abstract = "The measurement of gene functional similarity plays a critical role in numerous biological applications, such as gene clustering, the construction of gene similarity networks. However, most existing approaches still rely heavily on traditional \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chu:2023:GTD, author = "Thang Chu and Thuy Trang Nguyen and Bui Duong Hai and Quang Huy Nguyen and Tuan Nguyen", title = "Graph Transformer for Drug Response Prediction", journal = j-TCBB, volume = "20", number = "2", pages = "1065--1072", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3206888", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3206888", abstract = "{$<$ italic$>$Background$<$}/{italic$>$}: Previous models have shown that learning drug features from their graph representation is more efficient than learning from their strings or numeric representations. Furthermore, integrating multi-omics data of cell lines \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Herrera-Romero:2023:GIT, author = "Bryan Herrera-Romero and Diego Almeida-Gal{\'a}rraga and Graciela M. Salum and Fernando Villalba-Meneses and Marco Esteban Gudi{\~n}o-Gomezjurado", title = "{GUSignal}: an Informatics Tool to Analyze Glucuronidase Gene Expression in \bioname{Arabidopsis thaliana} Roots", journal = j-TCBB, volume = "20", number = "2", pages = "1073--1080", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3190427", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3190427", abstract = "The {$<$ italic$>$ uidA$<$}/{italic$>$} gene codifies for a glucuronidase (GUS) enzyme which has been used as a biotechnological tool during the last years. When {$<$ italic$>$ uidA$<$}/{italic$>$} gene is fused to a gene's promotor region, it is possible to evaluate the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sun:2023:HPS, author = "Junwei Sun and Haoping Ji and Yingcong Wang and Yanfeng Wang", title = "Hybrid Projective Synchronization via {PI} Controller Based on {DNA} {Strand} Displacement", journal = j-TCBB, volume = "20", number = "2", pages = "1081--1091", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3190397", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3190397", abstract = "Classical three-variable chaotic system coupling synchronization has been implemented in previous work based on DNA strand displacement (DSD). Herein, by using DSD reactions as the foundation, a proportional integral (PI) controller for chaotic system is \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zheng:2023:ITU, author = "Vicky Zheng and Ahmet Erdem Sariyuce and Jaroslaw Zola", title = "Identifying Taxonomic Units in Metagenomic {DNA} Streams on Mobile Devices", journal = j-TCBB, volume = "20", number = "2", pages = "1092--1103", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3172661", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3172661", abstract = "With the emergence of portable DNA sequencers, such as Oxford Nanopore Technology MinION, metagenomic DNA sequencing can be performed in real-time and directly in the field. However, because metagenomic DNA analysis tasks, e.g., classification, taxonomic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gormez:2023:IMD, author = "Yasin G{\"o}rmez and Zafer Aydin", title = "{IGPRED-MultiTask}: a Deep Learning Model to Predict Protein Secondary Structure, Torsion Angles and Solvent Accessibility", journal = j-TCBB, volume = "20", number = "2", pages = "1104--1113", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3191395", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3191395", abstract = "Protein secondary structure, solvent accessibility and torsion angle predictions are preliminary steps to predict 3D structure of a protein. Deep learning approaches have achieved significant improvements in predicting various features of protein \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Du:2023:IBQ, author = "Yongping Du and Jingya Yan and Yuxuan Lu and Yiliang Zhao and Xingnan Jin", title = "Improving Biomedical Question Answering by Data Augmentation and Model Weighting", journal = j-TCBB, volume = "20", number = "2", pages = "1114--1124", month = mar, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3171388", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3171388", abstract = "Biomedical Question Answering aims to extract an answer to the given question from a biomedical context. Due to the strong professionalism of specific domain, it's more difficult to build large-scale datasets for specific domain question answering. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Martin-Vide:2023:ACB, author = "Carlos Mart{\'\i}n-Vide and Miguel A. Vega-Rodr{\'\i}guez", title = "Algorithms for Computational Biology: Eighth Edition", journal = j-TCBB, volume = "20", number = "3", pages = "1626--1627", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3218808", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3218808", abstract = "This special section of {$<$ italic$>$IEEE}/ACM Transactions on Computational Biology and {Bioinformatics$<$}/{italic$>$} presents extended versions of some of the best papers accepted at the Eighth International Conference on Algorithms for Computational Biology, AlCoB \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Alexandrino:2023:RID, author = "Alexsandro Oliveira Alexandrino and Klairton Lima Brito and Andre Rodrigues Oliveira and Ulisses Dias and Zanoni Dias", title = "Reversal and Indel Distance With Intergenic Region Information", journal = j-TCBB, volume = "20", number = "3", pages = "1628--1640", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215615", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215615", abstract = "Recent works on genome rearrangements have shown that incorporating intergenic region information along with gene order in models provides better estimations for the rearrangement distance than using gene order alone. The reversal distance is one of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Brito:2023:GRD, author = "Klairton Lima Brito and Alexsandro Oliveira Alexandrino and Andre Rodrigues Oliveira and Ulisses Dias and Zanoni Dias", title = "Genome Rearrangement Distance With a Flexible Intergenic Regions Aspect", journal = j-TCBB, volume = "20", number = "3", pages = "1641--1653", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3165443", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3165443", abstract = "Most mathematical models for genome rearrangement problems have considered only gene order. In this way, the rearrangement distance considering some set of events, such as reversal and transposition events, is commonly defined as the minimum number of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Landry:2023:DPN, author = "Kaari Landry and Aivee Teodocio and Manuel Lafond and Olivier Tremblay-Savard", title = "Defining Phylogenetic Network Distances Using Cherry Operations", journal = j-TCBB, volume = "20", number = "3", pages = "1654--1666", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3162991", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3162991", abstract = "In phylogenetic networks, picking a cherry consists of removing a leaf that shares a parent with another leaf, or removing a reticulate edge whose endpoints are parents of leaves. Cherry-picking operations were recently shown to have several structural \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mou:2023:STD, author = "Chenqi Mou and Wenwen Ju", title = "Sparse Triangular Decomposition for Computing Equilibria of Biological Dynamic Systems Based on Chordal Graphs", journal = j-TCBB, volume = "20", number = "3", pages = "1667--1678", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3156759", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3156759", abstract = "Many biological systems are modeled mathematically as dynamic systems in the form of polynomial or rational differential equations. In this paper we apply sparse triangular decomposition to compute the equilibria of biological dynamic systems by \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Schaller:2023:BMG, author = "David Schaller and Manuela Gei{\ss} and Marc Hellmuth and Peter F. Stadler", title = "Best Match Graphs With Binary Trees", journal = j-TCBB, volume = "20", number = "3", pages = "1679--1690", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3143870", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3143870", abstract = "Best match graphs (BMG) are a key intermediate in graph-based orthology detection and contain a large amount of information on the gene tree. We provide a near-cubic algorithm to determine whether a BMG is binary-explainable, i.e., whether it can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yao:2023:IDV, author = "Yin Yao and Martin C. Frith", title = "Improved {DNA}-Versus-Protein Homology Search for Protein Fossils", journal = j-TCBB, volume = "20", number = "3", pages = "1691--1699", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3177855", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3177855", abstract = "Protein fossils, i.e., noncoding DNA descended from coding DNA, arise frequently from transposable elements (TEs), decayed genes, and viral integrations. They can reveal, and mislead about, evolutionary history and relationships. They have been detected \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zaharias:2023:LSM, author = "Paul Zaharias and Vladimir Smirnov and Tandy Warnow", title = "Large-Scale Multiple Sequence Alignment and the Maximum Weight Trace Alignment Merging Problem", journal = j-TCBB, volume = "20", number = "3", pages = "1700--1712", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3191848", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3191848", abstract = "MAGUS is a recent multiple sequence alignment method that provides excellent accuracy on large challenging datasets. MAGUS uses divide-and-conquer: it divides the sequences into disjoint sets, computes alignments on the disjoint sets, and then merges the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2023:PCS, author = "Kaitao Wu and Lexiang Wang and Bo Liu and Yang Liu and Yadong Wang and Junyi Li", title = "{PSPGO}: Cross-Species Heterogeneous Network Propagation for Protein Function Prediction", journal = j-TCBB, volume = "20", number = "3", pages = "1713--1724", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215257", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215257", abstract = "How to use computational methods to effectively predict the function of proteins remains a challenge. Most prediction methods based on single species or single data source have some limitations: the former need to train different models for different \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Villaverde:2023:APU, author = "Alejandro F. Villaverde and Elba Raim{\'u}ndez and Jan Hasenauer and Julio R. Banga", title = "Assessment of Prediction Uncertainty Quantification Methods in Systems Biology", journal = j-TCBB, volume = "20", number = "3", pages = "1725--1736", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3213914", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3213914", abstract = "Biological processes are often modelled using ordinary differential equations. The unknown parameters of these models are estimated by optimizing the fit of model simulation and experimental data. The resulting parameter estimates inevitably possess some \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Song:2023:IPL, author = "Jinmiao Song and Shengwei Tian and Long Yu and Qimeng Yang and Yuanxu Wang and Qiguo Dai and Xiaodong Duan", title = "{ISLMI}: Predicting {lncRNA--miRNA} Interactions Based on Information Injection and Second-Order Graph Convolution Network", journal = j-TCBB, volume = "20", number = "3", pages = "1737--1745", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215151", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215151", abstract = "Studies have shown that IncRNA-miRNA interactions can affect cellular expression at the level of gene molecules through a variety of regulatory mechanisms and have important effects on the biological activities of living organisms. Several biomolecular \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kang:2023:HTS, author = "Yan Kang and Haining Wang and Bin Pu and Liu Tao and Jianguo Chen and Philip S. Yu", title = "A Hybrid Two-Stage Teaching-Learning-Based Optimization Algorithm for Feature Selection in Bioinformatics", journal = j-TCBB, volume = "20", number = "3", pages = "1746--1760", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215129", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215129", abstract = "The `curse of dimensionality' brings new challenges to the feature selection (FS) problem, especially in bioinformatics filed. In this paper, we propose a hybrid Two-Stage Teaching-Learning-Based Optimization (TS-TLBO) algorithm to improve \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Das:2023:BBR, author = "Pranab Das and Yogita Thakran and S. R. Ngamwal Anal and Vipin Pal and Anju Yadav", title = "{BRMCF}: Binary Relevance and {MLSMOTE} Based Computational Framework to Predict Drug Functions From Chemical and Biological Properties of Drugs", journal = j-TCBB, volume = "20", number = "3", pages = "1761--1773", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215645", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215645", abstract = "In silico machine learning based prediction of drug functions considering the drug properties would substantially enhance the speed and reduce the cost of identifying promising drug leads. The drug function prediction capability of different drug \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xi:2023:LPL, author = "Wen-Yu Xi and Feng Zhou and Ying-Lian Gao and Jin-Xing Liu and Chun-Hou Zheng", title = "{LDCMFC}: Predicting Long Non-Coding {RNA} and Disease Association Using Collaborative Matrix Factorization Based on Correntropy", journal = j-TCBB, volume = "20", number = "3", pages = "1774--1782", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215194", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215194", abstract = "With the development of bioinformatics, the important role played by lncRNAs in various intractable diseases has aroused the interest of many experts. In recent studies, researchers have found that several human diseases are related to lncRANs. Moreover, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bhattacharyya:2023:BAD, author = "Ramkishore Bhattacharyya", title = "Bidirectional Association Discovery Leads to Precise Identification of Lung Cancer Biomarkers and Genome Taxa Class", journal = j-TCBB, volume = "20", number = "3", pages = "1783--1794", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215630", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215630", abstract = "Identifying proximity between pairs of expression vectors is one of the fundamental requirements in machine learning and data mining algorithms. We propose a new metric, Bidirectional Association Similarity ({$<$ italic$>$BiAS$<$}/{italic$>$}), to measure the degree \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tchendji:2023:PTS, author = "Vianney Kengne Tchendji and Franklin Ingrid Kamga Youmbi and Cl{\'e}mentin Tayou Djamegni and Jerry Lacmou Zeutouo", title = "A Parallel Tiled and Sparsified Four-{Russians} Algorithm for {Nussinov}'s {RNA} Folding", journal = j-TCBB, volume = "20", number = "3", pages = "1795--1806", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3216826", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3216826", abstract = "To enable extensive research on the ribonucleic acid (RNA) molecule, predicting its spatial structure stands as a much-valued research field. In this regard, Nussinov and Jacobson published the (now) {$<$ italic$>$ de} {facto$<$}/{italic$>$} solution to predict the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2023:IGR, author = "Jie Xu and Guanxue Yang and Guohai Liu and Hui Liu", title = "Inferring Gene Regulatory Networks via Ensemble Path Consistency Algorithm Based on Conditional Mutual Information", journal = j-TCBB, volume = "20", number = "3", pages = "1807--1816", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3220581", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3220581", abstract = "Utilizing gene expression data to infer gene regulatory networks has received great attention because gene regulation networks can reveal complex life phenomena by studying the interaction mechanism among nodes. However, the reconstruction of large-scale \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Das:2023:EDC, author = "Pradeep Kumar Das and Biswajeet Sahoo and Sukadev Meher", title = "An Efficient Detection and Classification of Acute Leukemia Using Transfer Learning and Orthogonal Softmax Layer-Based Model", journal = j-TCBB, volume = "20", number = "3", pages = "1817--1828", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3218590", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3218590", abstract = "For the early diagnosis of hematological disorders like blood cancer, microscopic analysis of blood cells is very important. Traditional deep CNNs lead to overfitting when it receives small medical image datasets such as ALLIDB1, ALLIDB2, and ASH. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Maia:2023:IEC, author = "Marcelo Rodrigues de Holanda Maia and Alexandre Plastino and Alex Freitas and Jo{\~a}o Pedro de Magalh{\~a}es", title = "Interpretable Ensembles of Classifiers for Uncertain Data With Bioinformatics Applications", journal = j-TCBB, volume = "20", number = "3", pages = "1829--1841", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3218588", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3218588", abstract = "Data uncertainty remains a challenging issue in many applications, but few classification algorithms can effectively cope with it. An ensemble approach for uncertain categorical features has recently been proposed, achieving promising results. It consists \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:FIC, author = "Shutao Chen and Lin Zhang and Xiangzhi Chen and Hui Liu", title = "{FGFICA}: Independent Component Analysis of Fusion Genomic Features for Mining Epi-Transcriptome Profiling Data", journal = j-TCBB, volume = "20", number = "3", pages = "1842--1853", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3220552", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3220552", abstract = "Existing studies indicate that in-depth studies of the {$<$ italic$>$N$<$ sup$>$6$<$}/{sup$ > $$ <$ } / {i t a l i c $ >$ } - methyladenosine ({m $ <$ s u p $ >$6$ <$ } / {s u p$ >$ A}) co - methylation patterns in epi - transcriptome profiling data may contribute to understanding its complex regulatory mechanisms. In order \ldots {}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2023:DDG, author = "Hui Yu and KangKang Li and JianYu Shi", title = "{DGANDDI}: Double Generative Adversarial Networks for Drug-Drug Interaction Prediction", journal = j-TCBB, volume = "20", number = "3", pages = "1854--1863", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3219883", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3219883", abstract = "Co-administration of multiple drugs may cause adverse drug interactions and side effects that damage the body. Therefore, accurate prediction of drug-drug interaction (DDI) events is of great importance. Recently, many computational methods have been \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bai:2023:IBR, author = "Jun Bai and Chuantao Yin and Zimeng Wu and Jianfei Zhang and Yanmeng Wang and Guanyi Jia and Wenge Rong and Zhang Xiong", title = "Improving Biomedical {ReQA} With Consistent {NLI}-Transfer and Post-Whitening", journal = j-TCBB, volume = "20", number = "3", pages = "1864--1875", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3219375", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3219375", abstract = "Retrieval Question Answering (ReQA) is an essential mechanism of information sharing which aims to find the answer to a posed question from large-scale candidates. Currently, the most efficient solution is Dual-Encoder which has shown great potential in \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lin:2023:ASD, author = "Xiangbin Lin and Weizhuang Kong and Jianxiu Li and Xuexiao Shao and Changting Jiang and Ruilan Yu and Xiaowei Li and Bin Hu", title = "Aberrant Static and Dynamic Functional Brain Network in Depression Based on {EEG} Source Localization", journal = j-TCBB, volume = "20", number = "3", pages = "1876--1889", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3222592", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3222592", abstract = "Objective. Depression is accompanied by abnormalities in large-scale functional brain networks. This paper combined static and dynamic methods to analyze the abnormal topology and changes of functional connectivity network (FCN) of depression. Methods. We \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tripathi:2023:AGC, author = "Prasun Chandra Tripathi and Soumen Bag", title = "An Attention-Guided {CNN} Framework for Segmentation and Grading of Glioma Using {$3$D} {MRI} Scans", journal = j-TCBB, volume = "20", number = "3", pages = "1890--1904", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3220902", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3220902", abstract = "Glioma has emerged as the deadliest form of brain tumor for human beings. Timely diagnosis of these tumors is a major step towards effective oncological treatment. Magnetic Resonance Imaging (MRI) typically offers a non-invasive inspection of brain \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:GID, author = "Shuhui Liu and Yupei Zhang and Xuequn Shang", title = "{GLassonet}: Identifying Discriminative Gene Sets Among Molecular Subtypes of Breast Cancer", journal = j-TCBB, volume = "20", number = "3", pages = "1905--1916", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3220623", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3220623", abstract = "Breast cancer is a heterogeneous disease caused by various alterations in the genome or transcriptome. Molecular subtypes of breast cancer have been reported, but useful biomarkers remain to be identified to uncover underlying biological mechanisms and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:DCP, author = "Hegang Chen and Yuyin Lu and Yuedong Yang and Yanghui Rao", title = "A Drug Combination Prediction Framework Based on Graph Convolutional Network and Heterogeneous Information", journal = j-TCBB, volume = "20", number = "3", pages = "1917--1925", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3224734", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3224734", abstract = "Combination therapy, which can improve therapeutic efficacy and reduce side effects, plays an important role in the treatment of complex diseases. Yet, a large number of possible combinations among candidate compounds limits our ability to identify \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:PRD, author = "Shengli Zhang and Yuanyuan Jing", title = "{PreVFs-RG}: a Deep Hybrid Model for Identifying Virulence Factors Based on Residual Block and Gated Recurrent Unit", journal = j-TCBB, volume = "20", number = "3", pages = "1926--1934", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3223038", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3223038", abstract = "Many infectious diseases are caused by bacterial pathogens. The pathogenic mechanisms of bacterial pathogens are complex and it is usually caused by virulence factors (VFs) in many cases. Whether VFs exist is the main difference between the genomes of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:CPC, author = "Minghua Zhao and Min Yuan and Yaning Yang and Steven X. Xu", title = "{CPGL}: Prediction of Compound-Protein Interaction by Integrating Graph Attention Network With Long Short-Term Memory Neural Network", journal = j-TCBB, volume = "20", number = "3", pages = "1935--1942", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3225296", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3225296", abstract = "Recent advancements of artificial intelligence based on deep learning algorithms have made it possible to computationally predict compound-protein interaction (CPI) without conducting laboratory experiments. In this manuscript, we integrated a graph \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:GPD, author = "Qichang Zhao and Guihua Duan and Haochen Zhao and Kai Zheng and Yaohang Li and Jianxin Wang", title = "{GIFDTI}: Prediction of Drug-Target Interactions Based on Global Molecular and Intermolecular Interaction Representation Learning", journal = j-TCBB, volume = "20", number = "3", pages = "1943--1952", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3225423", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3225423", abstract = "Drug discovery and drug repurposing often rely on the successful prediction of drug-target interactions (DTIs). Recent advances have shown great promise in applying deep learning to drug-target interaction prediction. One challenge in building deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dang:2023:ICD, author = "Qi Dang and Yong Liang and Dong Ouyang and Rui Miao and Caijin Ling and Xiaoying Liu and Shengli Xie", title = "Improved Computational Drug-Repositioning by Self-Paced Non-Negative Matrix Tri-Factorization", journal = j-TCBB, volume = "20", number = "3", pages = "1953--1962", month = may, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3225300", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3225300", abstract = "Drug repositioning (DR) is a strategy to find new targets for existing drugs, which plays an important role in reducing the costs, time, and risk of traditional drug development. Recently, the matrix factorization approach has been widely used in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gao:2023:GES, author = "Honghao Gao and Zijian Zhang and Ram{\'o}n J. Dur{\'a}n Barroso", title = "Guest Editorial Special Issue on Multi-Modal Biomedical Computing-Deep Transfer Learning", journal = j-TCBB, volume = "20", number = "4", pages = "2363--2366", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3284603", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3284603", abstract = "In Recent years, the development of biomedical imaging techniques, integrative sensors, and artificial intelligence has brought many benefits to the protection of health. We can collect, measure, and analyze vast volumes of health-related data using the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:CML, author = "Wei Wang and Xinhua Yu and Bo Fang and Yue Zhao and Yongyong Chen and Wei Wei and Junxin Chen", title = "Cross-Modality {LGE-CMR} Segmentation Using Image-to-Image Translation Based Data Augmentation", journal = j-TCBB, volume = "20", number = "4", pages = "2367--2375", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3140306", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3140306", abstract = "Accurate segmentation of ventricle and myocardium from the late gadolinium enhancement (LGE) cardiac magnetic resonance (CMR) is an important tool for myocardial infarction (MI) analysis. However, the complex enhancement pattern of LGE-CMR and the lack of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ni:2023:MMS, author = "Zhichen Ni and Honglong Chen and Zhe Li and Xiaomeng Wang and Na Yan and Weifeng Liu and Feng Xia", title = "{MSCET}: a Multi-Scenario Offloading Schedule for Biomedical Data Processing and Analysis in Cloud-Edge-Terminal Collaborative Vehicular Networks", journal = j-TCBB, volume = "20", number = "4", pages = "2376--2386", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3131177", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3131177", abstract = "With the rapid development of Artificial Intelligence (AI) and Internet of Things (IoTs), an increasing number of computation intensive or delay sensitive biomedical data processing and analysis tasks are produced in vehicles, bringing more and more \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:MMF, author = "Yuanpeng Zhang and Kaijian Xia and Yizhang Jiang and Pengjiang Qian and Weiwei Cai and Chengyu Qiu and Khin Wee Lai and Dongrui Wu", title = "Multi-Modality Fusion \& Inductive Knowledge Transfer Underlying Non-Sparse Multi-Kernel Learning and Distribution Adaption", journal = j-TCBB, volume = "20", number = "4", pages = "2387--2397", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3142748", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3142748", abstract = "With the development of sensors, more and more multimodal data are accumulated, especially in biomedical and bioinformatics fields. Therefore, multimodal data analysis becomes very important and urgent. In this study, we combine multi-kernel learning and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:EDM, author = "Nan Chen and Man Guo and Yongchao Li and Xiping Hu and Zhijun Yao and Bin Hu", title = "Estimation of Discriminative Multimodal Brain Network Connectivity Using Message-Passing-Based Nonlinear Network Fusion", journal = j-TCBB, volume = "20", number = "4", pages = "2398--2406", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3137498", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3137498", abstract = "Effective estimation of brain network connectivity enables better unraveling of the extraordinary complexity interactions of brain regions and helps in auxiliary diagnosis of psychiatric disorders. Considering different modalities can provide \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:DTL, author = "Jinxia Wang and Liang Qiao and Haibin Lv and Zhihan Lv", title = "Deep Transfer Learning-Based Multi-Modal Digital Twins for Enhancement and Diagnostic Analysis of Brain {MRI} Image", journal = j-TCBB, volume = "20", number = "4", pages = "2407--2419", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3168189", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3168189", abstract = "Objective: it aims to adopt deep transfer learning combined with Digital Twins (DTs) in Magnetic Resonance Imaging (MRI) medical image enhancement. Methods: MRI image enhancement method based on metamaterial composite technology is proposed by analyzing \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dharejo:2023:MBM, author = "Fayaz Ali Dharejo and Muhammad Zawish and Farah Deeba and Yuanchun Zhou and Kapal Dev and Sunder Ali Khowaja and Nawab Muhammad Faseeh Qureshi", title = "Multimodal-Boost: Multimodal Medical Image Super-Resolution Using Multi-Attention Network With Wavelet Transform", journal = j-TCBB, volume = "20", number = "4", pages = "2420--2433", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3191387", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3191387", abstract = "Multimodal medical images are widely used by clinicians and physicians to analyze and retrieve complementary information from high-resolution images in a non-invasive manner. Loss of corresponding image resolution adversely affects the overall performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bian:2023:IEA, author = "Yuexin Bian and Jintai Chen and Xiaojun Chen and Xiaoxian Yang and Danny Z. Chen and Jian Wu", title = "Identifying Electrocardiogram Abnormalities Using a Handcrafted-Rule-Enhanced Neural Network", journal = j-TCBB, volume = "20", number = "4", pages = "2434--2444", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3140785", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3140785", abstract = "A large number of people suffer from life-threatening cardiac abnormalities, and electrocardiogram (ECG) analysis is beneficial to determining whether an individual is at risk of such abnormalities. Automatic ECG classification methods, especially the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ahmed:2023:APN, author = "Imran Ahmed and Abdellah Chehri and Gwanggil Jeon and Francesco Piccialli", title = "Automated Pulmonary Nodule Classification and Detection Using Deep Learning Architectures", journal = j-TCBB, volume = "20", number = "4", pages = "2445--2456", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3192139", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3192139", abstract = "Recent advancement in biomedical imaging technologies has contributed to tremendous opportunities for the health care sector and the biomedical community. However, collecting, measuring, and analyzing large volumes of health-related data like images is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:SSM, author = "Qi-Qi Chen and Zhao-Hui Sun and Chuan-Feng Wei and Edmond Q. Wu and Dong Ming", title = "Semi-Supervised {$3$D} Medical Image Segmentation Based on Dual-Task Consistent Joint Learning and Task-Level Regularization", journal = j-TCBB, volume = "20", number = "4", pages = "2457--2467", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3144428", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3144428", abstract = "Semi-supervised learning has attracted wide attention from many researchers since its ability to utilize a few data with labels and relatively more data without labels to learn information. Some existing semi-supervised methods for medical image \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Adhikari:2023:DTL, author = "Mainak Adhikari and Abhishek Hazra and Sudarshan Nandy", title = "Deep Transfer Learning for Communicable Disease Detection and Recommendation in Edge Networks", journal = j-TCBB, volume = "20", number = "4", pages = "2468--2479", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3180393", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3180393", abstract = "Considering the increasing number of communicable disease cases such as COVID-19 worldwide, the early detection of the disease can prevent and limit the outbreak. Besides that, the PCR test kits are not available in most parts of the world, and there is \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2023:HMD, author = "Xiaolong Xu and Haoyan Xu and Liying Wang and Yuanyuan Zhang and Fu Xaio", title = "{Hygeia}: a Multilabel Deep Learning-Based Classification Method for Imbalanced Electrocardiogram Data", journal = j-TCBB, volume = "20", number = "4", pages = "2480--2493", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3176905", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3176905", abstract = "Electrocardiogram (ECG) is a common diagnostic indicator of heart disease in hospitals. Because of the low price and noninvasiveness of ECG diagnosis, it is widely used for prescreening and physical examination of heart diseases. In several studies on ECG \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2023:PDA, author = "Yuxin Ma and Shuo Wang and Yang Hua and Ruhui Ma and Tao Song and Zhengui Xue and Heng Cao and Haibing Guan", title = "Perceptual Data Augmentation for Biomedical Coronary Vessel Segmentation", journal = j-TCBB, volume = "20", number = "4", pages = "2494--2505", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3188148", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3188148", abstract = "Sufficient annotated data is critical to the success of deep learning methods. Annotating for vessel segmentation in X-ray coronary angiograms is extremely difficult because of the small and complex structures to be processed. Although unsupervised domain \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:HDT, author = "Jianyuan Li and Xiong Luo and Huimin Ma and Wenbing Zhao", title = "A Hybrid Deep Transfer Learning Model With Kernel Metric for {COVID-19} Pneumonia Classification Using Chest {CT} Images", journal = j-TCBB, volume = "20", number = "4", pages = "2506--2517", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3216661", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3216661", abstract = "Coronavirus disease-2019 (COVID-19) as a new pneumonia which is extremely infectious, the classification of this coronavirus is essential to effectively control the development of the epidemic. Pathological changes in the chest computed tomography (CT) \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Siniosoglou:2023:PPF, author = "Ilias Siniosoglou and Vasileios Argyriou and Panagiotis Sarigiannidis and Thomas Lagkas and Antonios Sarigiannidis and Sotirios K. Goudos and Shaohua Wan", title = "Post-Processing Fairness Evaluation of Federated Models: an Unsupervised Approach in Healthcare", journal = j-TCBB, volume = "20", number = "4", pages = "2518--2529", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3269767", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3269767", abstract = "Modern Healthcare cyberphysical systems have begun to rely more and more on distributed AI leveraging the power of Federated Learning (FL). Its ability to train Machine Learning (ML) and Deep Learning (DL) models for the wide variety of medical fields, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2023:DDP, author = "Wentian Cai and Linsen Xie and Weixian Yang and Yijiang Li and Ying Gao and Tingting Wang", title = "{DFTNet}: Dual-Path Feature Transfer Network for Weakly Supervised Medical Image Segmentation", journal = j-TCBB, volume = "20", number = "4", pages = "2530--2540", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198284", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198284", abstract = "Medical image segmentation has long suffered from the problem of expensive labels. Acquiring pixel-level annotations is time-consuming, labor-intensive, and relies on extensive expert knowledge. Bounding box annotations, in contrast, are relatively easy \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:CAB, author = "Qingbin Wang and Kaiyi Chen and Wanrong Dou and Yutao Ma", title = "Cross-Attention Based Multi-Resolution Feature Fusion Model for Self-Supervised Cervical {OCT} Image Classification", journal = j-TCBB, volume = "20", number = "4", pages = "2541--2554", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3246979", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3246979", abstract = "Cervical cancer seriously endangers the health of the female reproductive system and even risks women's life in severe cases. Optical coherence tomography (OCT) is a non-invasive, real-time, high-resolution imaging technology for cervical tissues. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2023:DTL, author = "Ke Yan and Xinlu Guo and Zhiwei Ji and Xiaokang Zhou", title = "Deep Transfer Learning for Cross-Species Plant Disease Diagnosis Adapting Mixed Subdomains", journal = j-TCBB, volume = "20", number = "4", pages = "2555--2564", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3135882", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3135882", abstract = "A deep transfer learning framework adapting mixed subdomains is proposed for cross-species plant disease diagnosis. Most existing deep transfer learning studies focus on knowledge transfer between highly correlated domains. These methods may fail to deal \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ding:2023:RMI, author = "Yi Ding and Xue Qin and Mingfeng Zhang and Ji Geng and Dajiang Chen and Fuhu Deng and Chunhe Song", title = "{RLSegNet}: an Medical Image Segmentation Network Based on Reinforcement Learning", journal = j-TCBB, volume = "20", number = "4", pages = "2565--2576", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3195705", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3195705", abstract = "In the area of medical image segmentation, the spatial information can be further used to enhance the image segmentation performance. And the 3D convolution is mainly used to better utilize the spatial information. However, how to better utilize the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liang:2023:TTE, author = "Tingting Liang and Congying Xia and Ziqiang Zhao and Yixuan Jiang and Yuyu Yin and Philip S. Yu", title = "Transferring From Textual Entailment to Biomedical Named Entity Recognition", journal = j-TCBB, volume = "20", number = "4", pages = "2577--2586", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3236477", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3236477", abstract = "Biomedical Named Entity Recognition (BioNER) aims at identifying biomedical entities such as genes, proteins, diseases, and chemical compounds in the given textual data. However, due to the issues of ethics, privacy, and high specialization of biomedical \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qayyum:2023:HDE, author = "Abdul Qayyum and Imran Razzak and M. Tanveer and Moona Mazher and Bandar Alhaqbani", title = "High-Density Electroencephalography and Speech Signal Based Deep Framework for Clinical Depression Diagnosis", journal = j-TCBB, volume = "20", number = "4", pages = "2587--2597", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3257175", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3257175", abstract = "Depression is a mental disorder characterized by persistent depressed mood or loss of interest in performing activities, causing significant impairment in daily routine. Possible causes include psychological, biological, and social sources of distress. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ren:2023:MIS, author = "Sheng Ren and Kehua Guo and Xiaokang Zhou and Bin Hu and Feihong Zhu and Entao Luo", title = "Medical Image Super-Resolution Based on Semantic Perception Transfer Learning", journal = j-TCBB, volume = "20", number = "4", pages = "2598--2609", month = jul, year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3212343", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:48 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3212343", abstract = "Medical images are an important basis for doctors to diagnose diseases, but some medical images have low resolution due to hardware technology and cost constraints. Super-resolution technology can reconstruct low-resolution medical images into high-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:CNN, author = "Mei-Neng Wang and Xue-Jun Xie and Zhu-Hong You and Leon Wong and Li-Ping Li and Zhan-Heng Chen", title = "Combining {$K$} Nearest Neighbor With Nonnegative Matrix Factorization for Predicting {CircRNA}-Disease Associations", journal = j-TCBB, volume = "20", number = "5", pages = "2610--2618", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3180903", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3180903", abstract = "Accumulating evidences show that circular RNAs (circRNAs) play an important role in regulating gene expression, and involve in many complex human diseases. Identifying associations of circRNA with disease helps to understand the pathogenesis, treatment \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guan:2023:PDB, author = "Shixuan Guan and Quan Zou and Hongjie Wu and Yijie Ding", title = "{Protein-DNA} Binding Residues Prediction Using a Deep Learning Model With Hierarchical Feature Extraction", journal = j-TCBB, volume = "20", number = "5", pages = "2619--2628", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3190933", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3190933", abstract = "Biologically important effects occur when proteins bind to other substances, of which binding to DNA is a crucial one. Therefore, accurate identification of protein-DNA binding residues is important for further understanding of the protein-DNA interaction \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:PMD, author = "Zheng-Wei Li and Qian-Kun Wang and Chang-An Yuan and Peng-Yong Han and Zhu-Hong You and Lei Wang", title = "Predicting {MiRNA-Disease} Associations by Graph Representation Learning Based on Jumping Knowledge Networks", journal = j-TCBB, volume = "20", number = "5", pages = "2629--2638", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3196394", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3196394", abstract = "Growing studies have shown that miRNAs are inextricably linked with many human diseases, and a great deal of effort has been spent on identifying their potential associations. Compared with traditional experimental methods, computational approaches have \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Huang:2023:TDU, author = "Chengxi Huang and Wei Wang and Xin Zhang and Shui-Hua Wang and Yu-Dong Zhang", title = "Tuberculosis Diagnosis Using Deep Transferred {EfficientNet}", journal = j-TCBB, volume = "20", number = "5", pages = "2639--2646", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3199572", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3199572", abstract = "Tuberculosis is a very deadly disease, with more than half of all tuberculosis cases dead in countries and regions with relatively poor health care resources. Fortunately, the disease is curable, and early diagnosis and medication can go a long way toward \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ye:2023:DTI, author = "Qing Ye and Xiaolong Zhang and Xiaoli Lin", title = "Drug-Target Interaction Prediction via Graph Auto-Encoder and Multi-Subspace Deep Neural Networks", journal = j-TCBB, volume = "20", number = "5", pages = "2647--2658", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3206907", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3206907", abstract = "Computational prediction of drug-target interaction (DTI) is important for the new drug discovery. Currently, the deep neural network (DNN) has been widely used in DTI prediction. However, parameters of the DNN could be insufficiently trained and features \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:PPB, author = "Shanshan Wang and Ruoyou Wu and Cheng Li and Juan Zou and Ziyao Zhang and Qiegen Liu and Yan Xi and Hairong Zheng", title = "{PARCEL}: Physics-Based Unsupervised Contrastive Representation Learning for Multi-Coil {MR} Imaging", journal = j-TCBB, volume = "20", number = "5", pages = "2659--2670", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3213669", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3213669", abstract = "With the successful application of deep learning to magnetic resonance (MR) imaging, parallel imaging techniques based on neural networks have attracted wide attention. However, in the absence of high-quality, fully sampled datasets for training, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lee:2023:CIC, author = "Seokwoo Lee and Wook Lee and Shulei Ren and Byungkyu Park and Kyungsook Han", title = "Constructing Integrative {ceRNA} Networks and Finding Prognostic Biomarkers in Renal Cell Carcinoma", journal = j-TCBB, volume = "20", number = "5", pages = "2671--2680", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3214190", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3214190", abstract = "Inspired by a newly discovered gene regulation mechanism known as competing endogenous RNA (ceRNA) interactions, several computational methods have been proposed to generate ceRNA networks. However, most of these methods have focused on deriving \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2023:MMA, author = "Xinguo Lu and Guanyuan Chen and Jinxin Li and Xiangjin Hu and Fengxu Sun", title = "{MAGCN}: a Multiple Attention Graph Convolution Networks for Predicting Synthetic Lethality", journal = j-TCBB, volume = "20", number = "5", pages = "2681--2689", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3221736", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3221736", abstract = "Synthetic lethality (SL) is a potential cancer therapeutic strategy and drug discovery. Computational approaches to identify synthetic lethality genes have become an effective complement to wet experiments which are time consuming and costly. Graph \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:UFC, author = "Qinhu Zhang and Youhong Xu and Siguo Wang and Yong Wu and Yuannong Ye and Chang-An Yuan and Valeriya Gribova and Vladimir Fedorovich Filaretov and De-Shuang Huang", title = "Using Fully Convolutional Network to Locate Transcription Factor Binding Sites Based on {DNA} Sequence and Conservation Information", journal = j-TCBB, volume = "20", number = "5", pages = "2690--2699", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3219831", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3219831", abstract = "Transcription factors (TFs) play a part in gene expression. TFs can form complex gene expression regulation system by combining with DNA. Thereby, identifying the binding regions has become an indispensable step for understanding the regulatory mechanism \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Alatrany:2023:TLC, author = "Abbas Saad Alatrany and Wasiq Khan and Abir J. Hussain and Jamila Mustafina and Dhiya Al-Jumeily", title = "Transfer Learning for Classification of {Alzheimer}'s Disease Based on Genome Wide Data", journal = j-TCBB, volume = "20", number = "5", pages = "2700--2711", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3233869", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3233869", abstract = "Alzheimer's disease (AD) is a type of brain disorder that is regarded as a degenerative disease because the corresponding symptoms aggravate with the time progression. Single nucleotide polymorphisms (SNPs) have been identified as relevant \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:RDR, author = "Haochen Zhao and Guihua Duan and Peng Ni and Cheng Yan and Yaohang Li and Jianxin Wang", title = "{RNPredATC}: a Deep Residual Learning-Based Model With Applications to the Prediction of {Drug-ATC} Code Association", journal = j-TCBB, volume = "20", number = "5", pages = "2712--2723", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3088256", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3088256", abstract = "The Anatomical Therapeutic Chemical (ATC) classification system, designated by the World Health Organization Collaborating Center (WHOCC), has been widely used in drug screening, repositioning, and similarity research. The ATC classification system \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2023:IDI, author = "Wuli Xu and Lei Duan and Huiru Zheng and Jesse Li-Ling and Weipeng Jiang and Yidan Zhang and Tingting Wang and Ruiqi Qin", title = "An Integrative Disease Information Network Approach to Similar Disease Detection", journal = j-TCBB, volume = "20", number = "5", pages = "2724--2735", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3110127", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3110127", abstract = "Disease similarity analysis impacts significantly in pathogenesis revealing, treatment recommending, and disease-causing genes predicting. Previous works study the disease similarity based on the semantics obtaining from biomedical ontologies (e.g., \ldots{})", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sutanto:2023:AGL, author = "Kevin Sutanto and Marcel Turcotte", title = "Assessing Global-Local Secondary Structure Fingerprints to Classify {RNA} Sequences With Deep Learning", journal = j-TCBB, volume = "20", number = "5", pages = "2736--2747", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3118358", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3118358", abstract = "RNA elements that are transcribed but not translated into proteins are called non-coding RNAs (ncRNAs). They play wide-ranging roles in biological processes and disorders. Just like proteins, their structure is often intimately linked to their function. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2023:MVF, author = "Wei Peng and Ming Liu and Wei Dai and Tielin Chen and Yu Fu and Yi Pan", title = "Multi-View Feature Aggregation for Predicting Microbe-Disease Association", journal = j-TCBB, volume = "20", number = "5", pages = "2748--2758", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3132611", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3132611", abstract = "Microbes play a crucial role in human health and disease. Figuring out the relationship between microbes and diseases leads to significant potential applications in disease treatments. It is an urgent need to devise robust and effective computational \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{BinZaman:2023:ASO, author = "Ahmed {Bin Zaman} and Toki Tahmid Inan and Kenneth {De Jong} and Amarda Shehu", title = "Adaptive Stochastic Optimization to Improve Protein Conformation Sampling", journal = j-TCBB, volume = "20", number = "5", pages = "2759--2771", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3134103", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3134103", abstract = "We have long known that characterizing protein structures structure is key to understanding protein function. Computational approaches have largely addressed a narrow formulation of the problem, seeking to compute one native structure from an amino-acid \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:PPG, author = "Xiaoshuai Zhang and Lixin Wang and Hucheng Liu and Xiaofeng Zhang and Bo Liu and Yadong Wang and Junyi Li", title = "{Prot2GO}: Predicting {GO} Annotations From Protein Sequences and Interactions", journal = j-TCBB, volume = "20", number = "5", pages = "2772--2780", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2021.3139841", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2021.3139841", abstract = "Protein is the main material basis of living organisms and plays crucial role in life activities. Understanding the function of protein is of great significance for new drug discovery, disease treatment and vaccine development. In recent years, with the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:PDT, author = "Jiatao Chen and Liang Zhang and Ke Cheng and Bo Jin and Xinjiang Lu and Chao Che", title = "Predicting Drug-Target Interaction Via Self-Supervised Learning", journal = j-TCBB, volume = "20", number = "5", pages = "2781--2789", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3153963", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3153963", abstract = "Recent advances in graph representation learning provide new opportunities for computational drug-target interaction (DTI) prediction. However, it still suffers from deficiencies of dependence on manual labels and vulnerability to attacks. Inspired by the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sun:2023:CMD, author = "Jing Sun and Li Pan and Bin Li and Haoyue Wang and Bo Yang and Wenbin Li", title = "A Construction Method of Dynamic Protein Interaction Networks by Using Relevant Features of Gene Expression Data", journal = j-TCBB, volume = "20", number = "5", pages = "2790--2801", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3264241", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3264241", abstract = "Essential proteins play an important role in various life activities and are considered to be a vital part of the organism. Gene expression data are an important dataset to construct dynamic protein-protein interaction networks (DPIN). The existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chu:2023:NBB, author = "He-Ming Chu and Xiang-Zhen Kong and Jin-Xing Liu and Chun-Hou Zheng and Han Zhang", title = "A New Binary Biclustering Algorithm Based on Weight Adjacency Difference Matrix for Analyzing Gene Expression Data", journal = j-TCBB, volume = "20", number = "5", pages = "2802--2809", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3283801", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3283801", abstract = "Biclustering algorithms are essential for processing gene expression data. However, to process the dataset, most biclustering algorithms require preprocessing the data matrix into a binary matrix. Regrettably, this type of preprocessing may introduce \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sheng:2023:SCM, author = "Nan Sheng and Lan Huang and Ling Gao and Yangkun Cao and Xuping Xie and Yan Wang", title = "A Survey of Computational Methods and Databases for {lncRNA-MiRNA} Interaction Prediction", journal = j-TCBB, volume = "20", number = "5", pages = "2810--2826", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3264254", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3264254", abstract = "Long non-coding RNAs (lncRNAs) and microRNAs (miRNAs) are two prevalent non-coding RNAs in current research. They play critical regulatory roles in the life processes of animals and plants. Studies have shown that lncRNAs can interact with miRNAs to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lv:2023:AMC, author = "Ji Lv and Guixia Liu and Yuan Ju and Houhou Huang and Ying Sun", title = "{AADB}: a Manually Collected Database for Combinations of Antibiotics With Adjuvants", journal = j-TCBB, volume = "20", number = "5", pages = "2827--2836", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3283221", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3283221", abstract = "Antimicrobial resistance is a global public health concern. The lack of innovations in antibiotic development has led to renewed interest in antibiotic adjuvants. However, there is no database to collect antibiotic adjuvants. Herein, we build a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Madadi:2023:AIM, author = "Yeganeh Madadi and Aboozar Monavarfeshani and Hao Chen and W. Daniel Stamer and Robert W. Williams and Siamak Yousefi", title = "Artificial Intelligence Models for Cell Type and Subtype Identification Based on Single-Cell {RNA} Sequencing Data in Vision Science", journal = j-TCBB, volume = "20", number = "5", pages = "2837--2852", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3284795", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3284795", abstract = "Single-cell RNA sequencing (scRNA-seq) provides a high throughput, quantitative and unbiased framework for scientists in many research fields to identify and characterize cell types within heterogeneous cell populations from various tissues. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gao:2023:CCN, author = "Zhen Gao and Jin Tang and Junfeng Xia and Chun-Hou Zheng and Pi-Jing Wei", title = "{CNNGRN}: a Convolutional Neural Network-Based Method for Gene Regulatory Network Inference From Bulk Time-Series Expression Data", journal = j-TCBB, volume = "20", number = "5", pages = "2853--2861", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3282212", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3282212", abstract = "Gene regulatory networks (GRNs) participate in many biological processes, and reconstructing them plays an important role in systems biology. Although many advanced methods have been proposed for GRN reconstruction, their predictive performance is far \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cao:2023:CAM, author = "Yu Cao and Wenya Pi and Chun-Yu Lin and Ulrike M{\"u}nzner and Masahiro Ohtomo and Tatsuya Akutsu", title = "Common Attractors in Multiple {Boolean} Networks", journal = j-TCBB, volume = "20", number = "5", pages = "2862--2873", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3268795", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3268795", abstract = "Analyzing multiple networks is important to understand relevant features among different networks. Although many studies have been conducted for that purpose, not much attention has been paid to the analysis of attractors (i.e., steady states) in multiple \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wei:2023:CMB, author = "Ze-Gang Wei and Xu Chen and Xiao-Dan Zhang and Hao Zhang and Xing-Guo Fan and Hong-Yan Gao and Fei Liu and Yu Qian", title = "Comparison of Methods for Biological Sequence Clustering", journal = j-TCBB, volume = "20", number = "5", pages = "2874--2888", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3253138", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3253138", abstract = "Recent advances in sequencing technology have considerably promoted genomics research by providing high-throughput sequencing economically. This great advancement has resulted in a huge amount of sequencing data. Clustering analysis is powerful to study \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Romashchenko:2023:CPM, author = "Nikolai Romashchenko and Benjamin Linard and Eric Rivals and Fabio Pardi", title = "Computing Phylo-$k$-Mers", journal = j-TCBB, volume = "20", number = "5", pages = "2889--2897", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3278049", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3278049", abstract = "Finding the correct position of new sequences within an established phylogenetic tree is an increasingly relevant problem in evolutionary bioinformatics and metagenomics. Recently, alignment-free approaches for this task have been proposed. One such \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:CPC, author = "Niannian Liu and Zequn Zhang and Yanan Wu and Yinglong Wang and Ying Liang", title = "{CRBSP:Prediction} of {CircRNA--RBP} Binding Sites Based on Multimodal Intermediate Fusion", journal = j-TCBB, volume = "20", number = "5", pages = "2898--2906", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3272400", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3272400", abstract = "Circular RNA (CircRNA) is widely expressed and has physiological and pathological significance, regulating post-transcriptional processes via its protein-binding activity. However, whereas much work has been done on linear RNA and RNA binding protein (RBP). \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:DPP, author = "Fan Zhang and Yawei Zhang and Xiaoke Zhu and Xiaopan Chen and Fuhao Lu and Xinhong Zhang", title = "{DeepSG2PPI}: a Protein-Protein Interaction Prediction Method Based on Deep Learning", journal = j-TCBB, volume = "20", number = "5", pages = "2907--2919", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3268661", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3268661", abstract = "Protein-protein interaction (PPI) plays an important role in almost all life activities. Many protein interaction sites have been confirmed by biological experiments, but these PPI site identification methods are time-consuming and expensive. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ravikumar:2023:EIS, author = "Visweswaran Ravikumar and Tong Xu and Wajd N. Al-Holou and Salar Fattahi and Arvind Rao", title = "Efficient Inference of Spatially-Varying {Gaussian} {Markov} Random Fields With Applications in Gene Regulatory Networks", journal = j-TCBB, volume = "20", number = "5", pages = "2920--2932", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3282028", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3282028", abstract = "In this paper, we study the problem of inferring spatially-varying Gaussian Markov random fields (SV-GMRF) where the goal is to learn a network of sparse, context-specific GMRFs representing network relationships between genes. An important application of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:EBR, author = "Bo Zhao and Jun Bai and Chen Li and Jianfei Zhang and Wenge Rong and Yuanxin Ouyang and Zhang Xiong", title = "Enhancing Biomedical {ReQA} With Adversarial Hard In-Batch Negative Samples", journal = j-TCBB, volume = "20", number = "5", pages = "2933--2944", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3261315", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:49 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3261315", abstract = "Question answering (QA) plays a vital role in biomedical natural language processing. Among question answering tasks, the retrieval question answering (ReQA) aims to directly retrieve the correct answer from candidates and has attracted much attention in \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2023:GEI, author = "Zhipeng Cai and Min Li and Pavel Skums and Yanjie Wei", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "20", number = "6", pages = "3329--3331", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3325032", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3325032", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lee:2023:EVI, author = "Hunmin Lee and Mingon Kang and Donghyun Kim and Daehee Seo and Yingshu Li", title = "Epidemic Vulnerability Index for Effective Vaccine Distribution Against Pandemic", journal = j-TCBB, volume = "20", number = "6", pages = "3332--3342", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198365", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198365", abstract = "COVID-19 vaccine distribution route directly impacts the community's mortality and infection rate. Therefore, optimal vaccination dissemination would appreciably lower the death and infection rates. This paper proposes the Epidemic Vulnerability \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:KGF, author = "Qing Zhao and Jianqiang Li and Linna Zhao and Zhichao Zhu", title = "Knowledge Guided Feature Aggregation for the Prediction of Chronic Obstructive Pulmonary Disease With {Chinese} {EMRs}", journal = j-TCBB, volume = "20", number = "6", pages = "3343--3352", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198798", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198798", abstract = "The automatic disease diagnosis utilizing clinical data has been suffering from the issues of feature sparse and high probability of missing values. Since the graph neural network is a effective tool to model the structural information and infer the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:MDA, author = "Yueyue Wang and Xiujuan Lei and Yi Pan", title = "Microbe-Disease Association Prediction Using {RGCN} Through Microbe-Drug-Disease Network", journal = j-TCBB, volume = "20", number = "6", pages = "3353--3362", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3247035", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3247035", abstract = "Accumulating evidence has shown that microbes play significant roles in human health and diseases. Therefore, identifying microbe-disease associations is conducive to disease prevention. In this article, a predictive method called TNRGCN is designed for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Peng:2023:PMD, author = "Wei Peng and Zicheng Che and Wei Dai and Shoulin Wei and Wei Lan", title = "Predicting {miRNA}-Disease Associations From {miRNA}-Gene-Disease Heterogeneous Network With Multi-Relational Graph Convolutional Network Model", journal = j-TCBB, volume = "20", number = "6", pages = "3363--3375", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3187739", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3187739", abstract = "MiRNAs are reported to be linked to the pathogenesis of human complex diseases. Disease-related miRNAs may serve as novel bio-marks and drug targets. This work focuses on designing a multi-relational Graph Convolutional Network model to predict miRNA-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ali:2023:EAK, author = "Sarwan Ali and Bikram Sahoo and Muhammad Asad Khan and Alexander Zelikovsky and Imdad Ullah Khan and Murray Patterson", title = "Efficient Approximate Kernel Based Spike Sequence Classification", journal = j-TCBB, volume = "20", number = "6", pages = "3376--3388", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3206284", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3206284", abstract = "Machine learning (ML) models, such as SVM, for tasks like classification and clustering of sequences, require a definition of distance/similarity between pairs of sequences. Several methods have been proposed to compute the similarity between sequences, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guan:2023:RRN, author = "Yuxia Guan and Ying An and Jingrui Xu and Ning Liu and Jianxin Wang", title = "{HA-ResNet}: Residual Neural Network With Hidden Attention for {ECG} Arrhythmia Detection Using Two-Dimensional Signal", journal = j-TCBB, volume = "20", number = "6", pages = "3389--3398", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198998", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198998", abstract = "Arrhythmia is an abnormal heart rhythm, a common clinical problem in cardiology. Long-term or severe arrhythmia may lead to stroke and sudden cardiac death. The electrocardiogram (ECG) is the most commonly used tool to diagnose arrhythmia. However, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:DCU, author = "Qiang Li and Hong Song and Zenghui Wei and Fengbo Yang and Jingfan Fan and Danni Ai and Yucong Lin and Xiaoling Yu and Jian Yang", title = "Densely Connected {U-Net} With Criss-Cross Attention for Automatic Liver Tumor Segmentation in {CT} Images", journal = j-TCBB, volume = "20", number = "6", pages = "3399--3410", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198425", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198425", abstract = "Automatic liver tumor segmentation plays a key role in radiation therapy of hepatocellular carcinoma. In this paper, we propose a novel densely connected U-Net model with criss-cross attention (CC-DenseUNet) to segment liver tumors in computed tomography ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lv:2023:TTB, author = "Zhilong Lv and Yuexiao Lin and Rui Yan and Ying Wang and Fa Zhang", title = "{TransSurv}: Transformer-Based Survival Analysis Model Integrating Histopathological Images and Genomic Data for Colorectal Cancer", journal = j-TCBB, volume = "20", number = "6", pages = "3411--3420", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3199244", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3199244", abstract = "Survival analysis is a significant study in cancer prognosis, and the multi-modal data, including histopathological images, genomic data, and clinical information, provides unprecedented opportunities for its development. However, because of the high \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:CAS, author = "Cheng Chen and Yuguo Zha and Daming Zhu and Kang Ning and Xuefeng Cui", title = "{ContactLib-ATT}: a Structure-Based Search Engine for Homologous Proteins", journal = j-TCBB, volume = "20", number = "6", pages = "3421--3429", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3197802", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3197802", abstract = "General-purpose protein structure embedding can be used for many important protein biology tasks, such as protein design, drug design and binding affinity prediction. Recent researches have shown that attention-based encoder layers are more suitable to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:AAM, author = "Jiamin Chen and Jianliang Gao and Tengfei Lyu and Babatounde Moctard Oloulade and Xiaohua Hu", title = "{AutoMSR}: Auto Molecular Structure Representation Learning for Multi-label Metabolic Pathway Prediction", journal = j-TCBB, volume = "20", number = "6", pages = "3430--3439", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3198119", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3198119", abstract = "It is significant to comprehend the relationship between metabolic pathway and molecular pathway for synthesizing new molecules, for instance optimizing drug metabolization. In bioinformatics fields, multi-label prediction of metabolic pathways is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2023:GSI, author = "Hong-Dong Li and Chao Deng and Xiao-Qi Zhang and Cui-Xiang Lin", title = "A Gene Set-Integrated Approach for Predicting Disease-Associated Genes", journal = j-TCBB, volume = "20", number = "6", pages = "3440--3450", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3214517", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3214517", abstract = "It is important to identify disease-associated genes for studying the pathogenic mechanism of complex diseases. Recently, models for disease gene prediction are dominantly based on molecular expression data and networks, including gene expression, protein \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fu:2023:DRD, author = "Haitao Fu and Cecheng Zhao and Xiaohui Niu and Wen Zhang", title = "{DRLM}: a Robust Drug Representation Learning Method and its Applications", journal = j-TCBB, volume = "20", number = "6", pages = "3451--3460", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3213979", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3213979", abstract = "Learning representations from data is a fundamental step for machine learning. High-quality and robust drug representations can broaden the understanding of pharmacology, and improve the modeling of multiple drug-related prediction tasks, which further \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qian:2023:PCM, author = "Yurong Qian and Jingjing Zheng and Ying Jiang and Shaoqiu Li and Lei Deng", title = "Prediction of {circRNA-MiRNA} Association Using Singular Value Decomposition and Graph Neural Networks", journal = j-TCBB, volume = "20", number = "6", pages = "3461--3468", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3222777", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3222777", abstract = "A large number of experimental studies have shown that circRNAs can act as molecular sponges of microRNAs, interacting with miRNAs to regulate gene expression levels, thereby affecting the development of human diseases. Exploring the potential \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cheng:2023:DNC, author = "Enhao Cheng and Jun Zhao and Hong Wang and Shuguang Song and Shuxian Xiong and Yanshen Sun", title = "Dual Network Contrastive Learning for Predicting Microbe-Disease Associations", journal = j-TCBB, volume = "20", number = "6", pages = "3469--3481", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3228617", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3228617", abstract = "Predicting microbe-disease associations is crucial for demystifying the causes of diseases and preventing them proactively. However, most of existing approaches are feeble to comprehensively investigate the interactive relationships between diseases and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gong:2023:SNS, author = "Tiansu Gong and Fusong Ju and Shiwei Sun and Dongbo Bu", title = "{SASA-Net}: a Spatial-Aware Self-Attention Mechanism for Building Protein {$3$D} Structure Directly From Inter- Residue Distances", journal = j-TCBB, volume = "20", number = "6", pages = "3482--3488", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3240456", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3240456", abstract = "Protein functions are tightly related to the fine details of their 3D structures. To understand protein structures, computational prediction approaches are highly needed. Recently, protein structure prediction has achieved considerable progresses mainly \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:MPM, author = "Huan Wang and Ruigang Liu and Baijing Wang and Yifan Hong and Ziwen Cui and Qiufen Ni", title = "Multitype Perception Method for Drug-Target Interaction Prediction", journal = j-TCBB, volume = "20", number = "6", pages = "3489--3498", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3285042", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3285042", abstract = "With the growing popularity of artificial intelligence in drug discovery, many deep-learning technologies have been used to automatically predict unknown drug-target interactions (DTIs). A unique challenge in using these technologies to predict DTI is \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cao:2023:IMB, author = "Qingqing Cao and Jianping Zhao and Haiyun Wang and Qi Guan and Chunhou Zheng", title = "An Integrated Method Based on {Wasserstein} Distance and Graph for Cancer Subtype Discovery", journal = j-TCBB, volume = "20", number = "6", pages = "3499--3510", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3293472", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3293472", abstract = "Due to the complexity of cancer pathogenesis at different omics levels, it is necessary to find a comprehensive method to accurately distinguish and find cancer subtypes for cancer treatment. In this paper, we proposed a new cancer multi-omics subtype \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mondal:2023:GDG, author = "Abhijit Mondal and Mukul S. Bansal", title = "Generalizing the Domain-Gene-Species Reconciliation Framework to Microbial Genes and Domains", journal = j-TCBB, volume = "20", number = "6", pages = "3511--3522", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3294480", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3294480", abstract = "Protein domains play an important role in the function and evolution of many gene families. Previous studies have shown that domains are frequently lost or gained during gene family evolution. Yet, most computational approaches for studying gene family \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2023:NBD, author = "Han Zhang and Zexuan Zhu and Hui Li and Shan He", title = "Network Biomarker Detection From Gene Co-Expression Network Using {Gaussian} Mixture Model Clustering", journal = j-TCBB, volume = "20", number = "6", pages = "3523--3534", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3297388", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3297388", abstract = "Finding network biomarkers from gene co-expression networks (GCNs) has attracted a lot of research interest. A network biomarker is a topological module, i.e., a group of densely connected nodes in a GCN, in which the gene expression values correlate with \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2023:MVC, author = "Wenming Wu and Wensheng Zhang and Weimin Hou and Xiaoke Ma", title = "Multi-View Clustering With Graph Learning for {scRNA-Seq} Data", journal = j-TCBB, volume = "20", number = "6", pages = "3535--3546", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3298334", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3298334", abstract = "Advances in single-cell biotechnologies have generated the single-cell RNA sequencing (scRNA-seq) of gene expression profiles at cell levels, providing an opportunity to study cellular distribution. Although significant efforts developed in their analysis,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2023:HPL, author = "Dayun Liu and Xianghui Li and Liangliang Zhang and Xiaowen Hu and Jiaxuan Zhang and Zhirong Liu and Lei Deng", title = "{HGNNLDA}: Predicting {lncRNA-Drug} Sensitivity Associations via a Dual Channel Hypergraph Neural Network", journal = j-TCBB, volume = "20", number = "6", pages = "3547--3555", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3302468", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3302468", abstract = "Drug sensitivity is critical for enabling personalized treatment. Many studies have shown that long non-coding RNAs (lncRNAs) are closely related to drug sensitivity because lncRNAs can regulate genes related to drug sensitivity to affect drug efficacy. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2023:GCG, author = "Jing Yang and Zhengshu Lu and Xu Chen and Deling Xu and Dewu Ding and Yanrui Ding", title = "{GCNA-Cluster}: a Gene Co-Expression Network Alignment to Cluster Cancer Patients Algorithm for Identifying Subtypes of Pancreatic Ductal Adenocarcinoma", journal = j-TCBB, volume = "20", number = "6", pages = "3556--3566", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3300102", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3300102", abstract = "Cancer heterogeneity makes it necessary to use different treatment strategies for patients with the same pathological features. Accurate identification of cancer subtypes is a crucial step in this approach. The current studies of pancreatic ductal \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jia:2023:PMP, author = "Baoli Jia and Qingfang Meng and Yuehui Chen and Hongri Yang", title = "Prediction of Membrane Protein Amphiphilic Helix Based on Horizontal Visibility Graph and Graph Convolution Network", journal = j-TCBB, volume = "20", number = "6", pages = "3567--3574", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3305493", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3305493", abstract = "Membrane protein amphiphilic helices play an important role in many biological processes. Based on the graph convolution network and the horizontal visibility graph the prediction method of membrane protein amphiphilic helix structure is proposed in this \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Saheed:2023:MGE, author = "Yakub K. Saheed and Bukola F. Balogun and Braimah Joseph Odunayo and Mustapha Abdulsalam", title = "Microarray Gene Expression Data Classification via {Wilcoxon} Sign Rank Sum and Novel Grey Wolf Optimized Ensemble Learning Models", journal = j-TCBB, volume = "20", number = "6", pages = "3575--3587", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3305429", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3305429", abstract = "Cancer is a deadly disease that affects the lives of people all over the world. Finding a few genes relevant to a single cancer disease can lead to effective treatments. The difficulty with microarray datasets is their high dimensionality; they have a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Gong:2023:CLE, author = "Yinyin Gong and Rui Li and Bin Fu and Yan Liu and Jilong Wang and Renfa Li and Danny Z. Chen", title = "A {CNN-LSTM} Ensemble Model for Predicting Protein-Protein Interaction Binding Sites", journal = j-TCBB, volume = "20", number = "6", pages = "3588--3599", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3306948", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3306948", abstract = "Proteins commonly perform biological functions through protein-protein interactions (PPIs). The knowledge of PPI sites is imperative for the understanding of protein functions, disease mechanisms, and drug design. Traditional biological experimental \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2023:MPI, author = "Xin Wang and Jie Li and Guohua Wang", title = "{MicroRNA} Promoter Identification in Human With a Three-level Prediction Method", journal = j-TCBB, volume = "20", number = "6", pages = "3600--3608", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3305992", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3305992", abstract = "The accurate annotation of miRNA promoters is critical for the mechanistic understanding of miRNA gene regulation. Various computational methods have been developed for the prediction of miRNA promoters solely employing a single classifier. Most of these \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cao:2023:MMP, author = "Yahui Cao and Tao Zhang and Xin Zhao and Xue Jia and Bingzhi Li", title = "{MooSeeker}: a Metabolic Pathway Design Tool Based on Multi-Objective Optimization Algorithm", journal = j-TCBB, volume = "20", number = "6", pages = "3609--3622", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3307363", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3307363", abstract = "Recently, metabolic pathway design has attracted considerable attention and become an increasingly important area in metabolic engineering. Manual or computational methods have been introduced to retrieve the metabolic pathway. These methods model \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2023:TIT, author = "Taoning Chen and Tingfang Wu and Deng Pan and Jinxing Xie and Jin Zhi and Xuejiao Wang and Lijun Quan and Qiang Lyu", title = "{TransRNAm}: Identifying Twelve Types of {RNA} Modifications by an Interpretable Multi-Label Deep Learning Model Based on Transformer", journal = j-TCBB, volume = "20", number = "6", pages = "3623--3634", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3307419", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3307419", abstract = "Accurate identification of RNA modification sites is of great significance in understanding the functions and regulatory mechanisms of RNAs. Recent advances have shown great promise in applying computational methods based on deep learning for accurate \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2023:EFP, author = "Weizhong Zhao and Wenjie Yao and Xingpeng Jiang and Tingting He and Chuan Shi and Xiaohua Hu", title = "An Explainable Framework for Predicting Drug-Side Effect Associations via Meta-Path-Based Feature Learning in Heterogeneous Information Network", journal = j-TCBB, volume = "20", number = "6", pages = "3635--3647", year = "2023", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3308094", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Tue Mar 19 08:33:52 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3308094", abstract = "Side effects of drugs have gained increasing attention in the biomedical field, and accurate identification of drug side effects is essential for drug development and drug safety surveillance. Although the traditional pharmacological experiments can \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{ElZein:2024:PCP, author = "Yamane {El Zein} and Mathieu Lemay and K{\'e}vin Huguenin", title = "{PrivaTree}: Collaborative Privacy-Preserving Training of Decision Trees on Biomedical Data", journal = j-TCBB, volume = "21", number = "1", pages = "1--13", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3286274", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3286274", abstract = "Biomedical data generation and collection have become faster and more ubiquitous. Consequently, datasets are increasingly spread across hospitals, research institutions, or other entities. Exploiting such distributed datasets simultaneously can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tu:2024:DMD, author = "Chao Tu and Denghui Du and Tieyong Zeng and Yu Zhang", title = "Deep Multi-Dictionary Learning for Survival Prediction With Multi-Zoom Histopathological Whole Slide Images", journal = j-TCBB, volume = "21", number = "1", pages = "14--25", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3321593", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3321593", abstract = "Survival prediction based on histopathological whole slide images (WSIs) is of great significance for risk-benefit assessment and clinical decision. However, complex microenvironments and heterogeneous tissue structures in WSIs bring challenges to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Morris:2024:EDA, author = "Jordan Morris and Ashur Rafiev and Graeme M. Bragg and Mark L. Vousden and David B. Thomas and Alex Yakovlev and Andrew D. Brown", title = "An Event-Driven Approach to Genotype Imputation on a Custom {RISC-V} Cluster", journal = j-TCBB, volume = "21", number = "1", pages = "26--35", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3328714", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3328714", abstract = "This article proposes an event-driven solution to genotype imputation, a technique used to statistically infer missing genetic markers in DNA. The work implements the widely accepted Li and Stephens model, primary contributor to the computational \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Choudhuri:2024:CPP, author = "Souradipto Choudhuri and Keya Sau", title = "{CodonU}: a {Python} Package for Codon Usage Analysis", journal = j-TCBB, volume = "21", number = "1", pages = "36--44", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3335823", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3335823", abstract = "Codon Usage Analysis (CUA) has been accompanied by several web servers and independent programs written in several programming languages. Also this diversity speaks for the need of a reusable software that can be helpful in reading, manipulating and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2024:MRG, author = "Shengpeng Yu and Hong Wang and Jing Li and Jun Zhao and Cheng Liang and Yanshen Sun", title = "A Multi-Relational Graph Encoder Network for Fine-Grained Prediction of {MiRNA}-Disease Associations", journal = j-TCBB, volume = "21", number = "1", pages = "45--56", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3335007", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3335007", abstract = "MicroRNAs (miRNAs) are critical in diagnosing and treating various diseases. Automatically demystifying the interdependent relationships between miRNAs and diseases has recently made remarkable progress, but their fine-grained interactive relationships \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2024:GBF, author = "Rui Guo and Xu Tian and Hanhe Lin and Stephen McKenna and Hong-Dong Li and Fei Guo and Jin Liu", title = "Graph-Based Fusion of Imaging, Genetic and Clinical Data for Degenerative Disease Diagnosis", journal = j-TCBB, volume = "21", number = "1", pages = "57--68", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3335369", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3335369", abstract = "Graph learning methods have achieved noteworthy performance in disease diagnosis due to their ability to represent unstructured information such as inter-subject relationships. While it has been shown that imaging, genetic and clinical data are crucial \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Noland:2024:ESP, author = "Jonas Kristiansen N{\o}land and Steinar Thorvaldsen", title = "The Exact Stochastic Process of the Haploid Multi-Allelic {Wright--Fisher} Mutation Model", journal = j-TCBB, volume = "21", number = "1", pages = "69--83", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3336850", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3336850", abstract = "Diffusion models are widely applied in population genetics, but their approximate solutions may not accurately capture the exact stochastic process. Nevertheless, this practice was necessary due to computing limitations, particularly for large \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tang:2024:MNM, author = "Wenliang Tang and Zhaohong Deng and Hanwen Zhou and Wei Zhang and Fuping Hu and Kup-Sze Choi and Shitong Wang", title = "{MVDINET}: a Novel Multi-Level Enzyme Function Predictor With Multi-View Deep Interactive Learning", journal = j-TCBB, volume = "21", number = "1", pages = "84--94", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3337158", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3337158", abstract = "As a class of extremely significant of biocatalysts, enzymes play an important role in the process of biological reproduction and metabolism. Therefore, the prediction of enzyme function is of great significance in biomedicine fields. Recently, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dong:2024:SCA, author = "Shujie Dong and Yuansheng Liu and Yongshun Gong and Xiangjun Dong and Xiangxiang Zeng", title = "{scCAN}: Clustering With Adaptive Neighbor-Based Imputation Method for Single-Cell {RNA-Seq} Data", journal = j-TCBB, volume = "21", number = "1", pages = "95--105", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3337231", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3337231", abstract = "Single-cell RNA sequencing (scRNA-seq) is widely used to study cellular heterogeneity in different samples. However, due to technical deficiencies, dropout events often result in zero gene expression values in the gene expression matrix. In this paper, we \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Rossi:2024:IMC, author = "Nicol{\`o} Rossi and Nicola Gigante and Nicola Vitacolonna and Carla Piazza", title = "Inferring {Markov} Chains to Describe Convergent Tumor Evolution With {CIMICE}", journal = j-TCBB, volume = "21", number = "1", pages = "106--119", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3337258", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3337258", abstract = "The field of tumor phylogenetics focuses on studying the differences within cancer cell populations. Many efforts are done within the scientific community to build cancer progression models trying to understand the heterogeneity of such diseases. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2024:PDD, author = "Dandan Li and Zhen Xiao and Han Sun and Xingpeng Jiang and Weizhong Zhao and Xianjun Shen", title = "Prediction of Drug--Disease Associations Based on Multi-Kernel Deep Learning Method in Heterogeneous Graph Embedding", journal = j-TCBB, volume = "21", number = "1", pages = "120--128", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3339189", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3339189", abstract = "Computational drug repositioning can identify potential associations between drugs and diseases. This technology has been shown to be effective in accelerating drug development and reducing experimental costs. Although there has been plenty of research \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2024:MNM, author = "Changyong Yu and Yuhai Zhao and Chu Zhao and Jianyu Jin and Keming Mao and Guoren Wang", title = "{MiniDBG}: a Novel and Minimal {de Bruijn} Graph for Read Mapping", journal = j-TCBB, volume = "21", number = "1", pages = "129--142", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3340251", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3340251", abstract = "The De Bruijn graph (DBG) has been widely used in the algorithms for indexing or organizing read and reference sequences in bioinformatics. However, a DBG model that can locate each node, edge and path on sequence has not been proposed so far. Recently, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:SMS, author = "Wei Wang and Mengxue Yu and Bin Sun and Juntao Li and Dong Liu and Hongjun Zhang and Xianfang Wang and Yun Zhou", title = "{SMGCN}: Multiple Similarity and Multiple Kernel Fusion Based Graph Convolutional Neural Network for Drug-Target Interactions Prediction", journal = j-TCBB, volume = "21", number = "1", pages = "143--154", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3339645", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3339645", abstract = "Accurately identifying potential drug-target interactions (DTIs) is a critical step in accelerating drug discovery. Despite many studies that have been conducted over the past decades, detecting DTIs remains a highly challenging and complicated process. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sinha:2024:ESA, author = "Rituparna Sinha and Rajat Kumar Pal and Rajat K. De", title = "{ENLIGHTENMENT}: a Scalable Annotated Database of Genomics and {NGS}-Based Nucleotide Level Profiles", journal = j-TCBB, volume = "21", number = "1", pages = "155--168", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3340067", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3340067", abstract = "The revolution in sequencing technologies has enabled human genomes to be sequenced at a very low cost and time leading to exponential growth in the availability of whole-genome sequences. However, the complete understanding of our genome and its \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Barnett:2024:GML, author = "Eric J. Barnett and Daniel G. Onete and Asif Salekin and Stephen V. Faraone", title = "Genomic Machine Learning Meta-regression: Insights on Associations of Study Features With Reported Model Performance", journal = j-TCBB, volume = "21", number = "1", pages = "169--177", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3343808", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3343808", abstract = "Many studies have been conducted with the goal of correctly predicting diagnostic status of a disorder using the combination of genomic data and machine learning. It is often hard to judge which components of a study led to better results and whether \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2024:NMS, author = "Yuerui Liu and Yongquan Jiang and Fan Zhang and Yan Yang", title = "A Novel Multi-Scale Graph Neural Network for Metabolic Pathway Prediction", journal = j-TCBB, volume = "21", number = "1", pages = "178--187", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3345647", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3345647", abstract = "Predicting the metabolic pathway classes of compounds in the human body is an important problem in drug research and development. For this purpose, we propose a Multi-Scale Graph Neural Network framework, named MSGNN. The framework includes a subgraph \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xin:2024:BLH, author = "Junchang Xin and Mingcan Wang and Luxuan Qu and Qi Chen and Weiyiqi Wang and Zhiqiong Wang", title = "{BIC-LP}: a Hybrid Higher-Order Dynamic {Bayesian} Network Score Function for Gene Regulatory Network Reconstruction", journal = j-TCBB, volume = "21", number = "1", pages = "188--199", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3345317", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3345317", abstract = "Reconstructing gene regulatory networks(GRNs) is an increasingly hot topic in bioinformatics. Dynamic Bayesian network(DBN) is a stochastic graph model commonly used as a vital model for GRN reconstruction. But probabilistic characteristics of biological \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zeng:2024:SIT, author = "Pengcheng Zeng and Zhixiang Lin", title = "{scICML}: Information-Theoretic Co-Clustering-Based Multi-View Learning for the Integrative Analysis of Single-Cell Multi-Omics Data", journal = j-TCBB, volume = "21", number = "1", pages = "200--207", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3305989", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3305989", abstract = "Modern high-throughput sequencing technologies have enabled us to profile multiple molecular modalities from the same single cell, providing unprecedented opportunities to assay cellular heterogeneity from multiple biological layers. However, the datasets \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nagda:2024:PHP, author = "Bindi M. Nagda and Van Minh Nguyen and Ryan T. White", title = "{promSEMBLE}: Hard Pattern Mining and Ensemble Learning for Detecting {DNA} Promoter Sequences", journal = j-TCBB, volume = "21", number = "1", pages = "208--214", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3339597", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri May 31 09:09:21 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3339597", abstract = "Accurate identification of DNA promoter sequences is of crucial importance in unraveling the underlying mechanisms that regulate gene transcription. Initiation of transcription is controlled through regulatory transcription factors binding to promoter \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cruz:2024:BOO, author = "Fernando Cruz and Jo{\~a}o Capela and Eug{\'e}nio C. Ferreira and Miguel Rocha and Oscar Dias", title = "{BioISO}: an Objective-Oriented Application for Assisting the Curation of Genome-Scale Metabolic Models", journal = j-TCBB, volume = "21", number = "2", pages = "215--226", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3339972", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3339972", abstract = "As the reconstruction of Genome-Scale Metabolic Models (GEMs) becomes standard practice in systems biology, the number of organisms having at least one metabolic model is peaking at an unprecedented scale. The automation of laborious tasks, such as gap-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2024:GIK, author = "Hao Zhang and Jiao Jiao and Tianheng Zhao and Enshuang Zhao and Lanhui Li and Guihua Li and Borui Zhang and Qing-Ming Qin", title = "{GERWR}: Identifying the Key Pathogenicity-Associated {sRNAs} of \bioname{Magnaporthe oryzae} Infection in Rice Based on Graph Embedding and Random Walk With Restart", journal = j-TCBB, volume = "21", number = "2", pages = "227--239", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3348080", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3348080", abstract = "Rice blast, caused by \bioname{Magnaporthe oryzae} (\bioname{M.oryzae}), is a destructive rice disease that reduces rice yield by 10\% to 30\% annually. It also affects other cereal crops such as barley, wheat, rye, millet, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Manu:2024:GFG, author = "Daniel Manu and Jingjing Yao and Wuji Liu and Xiang Sun", title = "{GraphGANFed}: a Federated Generative Framework for Graph-Structured Molecules Towards Efficient Drug Discovery", journal = j-TCBB, volume = "21", number = "2", pages = "240--253", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3349990", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3349990", abstract = "Recent advances in deep learning have accelerated its use in various applications, such as cellular image analysis and molecular discovery. In molecular discovery, a generative adversarial network (GAN), which comprises a discriminator to distinguish \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2024:NNO, author = "Zhao-Qi Hu and Yuan-Mao Hung and Li-Han Chen and Liang-Chuan Lai and Min-Hsiung Pan and Eric Y. Chuang and Mong-Hsun Tsai", title = "{NURECON}: a Novel Online System for Determining Nutrition Requirements Based on Microbial Composition", journal = j-TCBB, volume = "21", number = "2", pages = "254--264", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3349572", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3349572", abstract = "Dietary habits have been proven to have an impact on the microbial composition and health of the human gut. Over the past decade, researchers have discovered that gut microbiota can use nutrients to produce metabolites that have major implications for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jin:2024:SSS, author = "Sichen Jin and Yijia Zhang and Huimin Yu and Mingyu Lu", title = "{SADR}: Self-Supervised Graph Learning With Adaptive Denoising for Drug Repositioning", journal = j-TCBB, volume = "21", number = "2", pages = "265--277", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3351079", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3351079", abstract = "Traditional drug development is often high-risk and time-consuming. A promising alternative is to reuse or relocate approved drugs. Recently, some methods based on graph representation learning have started to be used for drug repositioning. These models \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Leuchtenberger:2024:LAN, author = "Alina F. Leuchtenberger and Arndt von Haeseler", title = "Learning From an Artificial Neural Network in Phylogenetics", journal = j-TCBB, volume = "21", number = "2", pages = "278--288", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3352268", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3352268", abstract = "We show that an iterative ansatz of deep learning and human intelligence guided simplification may lead to surprisingly simple solutions for a difficult problem in phylogenetics. Distinguishing Farris and Felsenstein trees is a longstanding problem in \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:LGN, author = "Wenjing Wang and Pengyong Han and Zhengwei Li and Ru Nie and Kangwei Wang and Lei Wang and Hongmei Liao", title = "{LMGATCDA}: Graph Neural Network With Labeling Trick for Predicting {circRNA}-Disease Associations", journal = j-TCBB, volume = "21", number = "2", pages = "289--300", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3355093", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3355093", abstract = "Previous studies have proven that circular RNAs (circRNAs) are inextricably connected to the etiology and pathophysiology of complicated diseases. Since conventional biological research are frequently small-scale, expensive, and time-consuming, it is \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2024:FBI, author = "Tiantian Li and Haitao Jiang and Binhai Zhu and Lusheng Wang and Daming Zhu", title = "Flanked Block-Interchange Distance on Strings", journal = j-TCBB, volume = "21", number = "2", pages = "301--311", month = mar, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3351440", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:16:12 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3351440", abstract = "Rearrangement sorting problems impact profoundly in measuring genome similarities and tracing historic scenarios of species. However, recent studies on genome rearrangement mechanisms disclosed a statistically significant evidence, repeats are situated at \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zanetti:2024:CSS, author = "Jo{\~a}o Paulo Pereira Zanetti and Lucas Peres Oliveira and Jo{\~a}o Meidanis and Leonid Chindelevitch", title = "Counting Sorting Scenarios and Intermediate Genomes for the Rank Distance", journal = j-TCBB, volume = "21", number = "3", pages = "316--327", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3277733", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3277733", abstract = "An important problem in genome comparison is the genome sorting problem, that is, the problem of finding a sequence of basic operations that transforms one genome into another whose length (possibly weighted) equals the distance between them. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sheng:2024:SDL, author = "Nan Sheng and Xuping Xie and Yan Wang and Lan Huang and Shuangquan Zhang and Ling Gao and Hao Wang", title = "A Survey of Deep Learning for Detecting {miRNA}-Disease Associations: Databases, Computational Methods, Challenges, and Future Directions", journal = j-TCBB, volume = "21", number = "3", pages = "328--347", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3351752", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3351752", abstract = "MicroRNAs (miRNAs) are an important class of non-coding RNAs that play an essential role in the occurrence and development of various diseases. Identifying the potential miRNA-disease associations (MDAs) can be beneficial in understanding disease \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Newman:2024:DDI, author = "Tara Newman and Hiu Fung Kevin Chang and Hosna Jabbari", title = "{DinoKnot}: Duplex Interaction of Nucleic Acids With {PseudoKnots}", journal = j-TCBB, volume = "21", number = "3", pages = "348--359", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3362308", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3362308", abstract = "Interaction of nucleic acid molecules is essential for their functional roles in the cell and their applications in biotechnology. While simple duplex interactions have been studied before, the problem of efficiently predicting the minimum free energy \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:CMS, author = "Juan Wang and Zhenchang Wang and Shasha Yuan and Chunhou Zheng and Jinxing Liu and Junliang Shang", title = "A Clustering Method for Single-Cell {RNA-Seq} Data Based on Automatic Weighting Penalty and Low-Rank Representation", journal = j-TCBB, volume = "21", number = "3", pages = "360--371", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3362472", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3362472", abstract = "Advances in high-throughput single-cell RNA sequencing (scRNA-seq) technology have provided more comprehensive biological information on cell expression. Clustering analysis is a critical step in scRNA-seq research and provides clear knowledge of the cell \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2024:SNR, author = "Xiao-Hui Yang and Ye-Tong Wang and Ming-Hui Wu and Fan Li and Cheng-Long Zhou and Li-Jun Yang and Chen Zheng and Yong Li and Zhi Li and Si-Yi Guo and Chun-Peng Song", title = "{SLPA-Net}: a Real-Time Recognition Network for Intelligent Stomata Localization and Phenotypic Analysis", journal = j-TCBB, volume = "21", number = "3", pages = "372--382", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3364208", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3364208", abstract = "Plant stomatal phenotype traits play an important role in improving crop water use efficiency, stress resistance and yield. However, at present, the acquisition of phenotype traits mainly relies on manual measurement, which is time-consuming and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2024:FNN, author = "Shan Zhang and Yuan Zhou and Pei Geng and Qing Lu", title = "Functional Neural Networks for High-Dimensional Genetic Data Analysis", journal = j-TCBB, volume = "21", number = "3", pages = "383--393", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3364614", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3364614", abstract = "Artificial intelligence (AI) is a thriving research field with many successful applications in areas such as computer vision and speech recognition. Machine learning methods, such as artificial neural networks (ANN), play a central role in modern AI \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2024:PPM, author = "Cheng Yan and Guihua Duan", title = "{PMDAGS}: Predicting {miRNA-Disease} Associations With Graph Nonlinear Diffusion Convolution Network and Similarities", journal = j-TCBB, volume = "21", number = "3", pages = "394--404", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3366175", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3366175", abstract = "Many studies have proven that microRNAs (miRNAs) can participate in a wide range of biological processes and can be considered as potential noninvasive biomarkers for disease diagnosis and prognosis. Therefore, many computational methods have been \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{S:2024:GNC, author = "Sheena K. S. and Madhu S. Nair", title = "{GenCoder}: a Novel Convolutional Neural Network Based Autoencoder for Genomic Sequence Data Compression", journal = j-TCBB, volume = "21", number = "3", pages = "405--415", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3366240", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3366240", abstract = "Revolutionary advances in DNA sequencing technologies fundamentally change the nature of genomics. Today&\#x0027;s sequencing technologies have opened into an outburst in genomic data volume. These data can be used in various applications where long-term \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:MPH, author = "Wei Wang and Zhenxi Sun and Dong Liu and Hongjun Zhang and Juntao Li and Xianfang Wang and Yun Zhou", title = "{MAHyNet}: Parallel Hybrid Network for {RNA-Protein} Binding Sites Prediction Based on Multi-Head Attention and Expectation Pooling", journal = j-TCBB, volume = "21", number = "3", pages = "416--427", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3366545", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3366545", abstract = "RNA-binding proteins (RBPs) can regulate biological functions by interacting with specific RNAs, and play an important role in many life activities. Therefore, the rapid identification of RNA-protein binding sites is crucial for functional annotation and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yoo:2024:IPS, author = "Sunyong Yoo and Myeonghyeon Jeong and Subhin Seomun and Kiseong Kim and Youngmahn Han", title = "Interpretable Prediction of {SARS-CoV-2} Epitope-Specific {TCR} Recognition Using a Pre-Trained Protein Language Model", journal = j-TCBB, volume = "21", number = "3", pages = "428--438", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3368046", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3368046", abstract = "The emergence of the novel coronavirus, designated as severe acute respiratory syndrome coronavirus-2 (SARS-CoV-2), has posed a significant threat to public health worldwide. There has been progress in reducing hospitalizations and deaths due to SARS-CoV-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shanthappa:2024:CPP, author = "Pallavi M. Shanthappa and Neeraj Verma and Anu George and Pawan K. Dhar and Prashanth Athri", title = "Computational Prediction of Potential Vaccine Candidates From {tRNA} Encoded peptides {(tREP)} Using a Bioinformatic Workflow and Molecular Dynamics Validations", journal = j-TCBB, volume = "21", number = "3", pages = "439--449", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3371984", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3371984", abstract = "Transfer RNAs (tRNA) are non-coding RNAs. Encouraged by biological applications discovered for peptides derived from other non-coding genomic regions, we explore the possibility of deriving epitope-based vaccines from tRNA encoded peptides (tREP) in this \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Solanki:2024:ESV, author = "Arnav Solanki and James Cornette and Julia Udell and George Vasmatzis and Marc Riedel", title = "Evasive Spike Variants Elucidate the Preservation of {T} Cell Immune Response to the {SARS-CoV-2} Omicron Variant", journal = j-TCBB, volume = "21", number = "3", pages = "450--460", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3372100", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3372100", abstract = "The Omicron variants boast the highest infectivity rates among all SARS-CoV-2 variants. Despite their lower disease severity, they can reinfect COVID-19 patients and infect vaccinated individuals as well. The high number of mutations in these variants \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jia:2024:AAD, author = "Yuhang Jia and Siyu Li and Rui Jiang and Shengquan Chen", title = "Accurate Annotation for Differentiating and Imbalanced Cell Types in Single-Cell Chromatin Accessibility Data", journal = j-TCBB, volume = "21", number = "3", pages = "461--471", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3372970", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3372970", abstract = "Rapid advances in single-cell chromatin accessibility sequencing (scCAS) technologies have enabled the characterization of epigenomic heterogeneity and increased the demand for automatic annotation of cell types. However, there are few computational \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2024:TTS, author = "Minglie Li and Shusen Zhou and Tong Liu and Chanjuan Liu and Mujun Zang and Qingjun Wang", title = "{TSVM}: Transfer Support Vector Machine for Predicting {MPRA} Validated Regulatory Variants", journal = j-TCBB, volume = "21", number = "3", pages = "472--479", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3374413", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3374413", abstract = "Genome-wide association studies have shown that common genetic variants associated with complex diseases are mostly located in non-coding regions, which may not be causal. In addition, the limited number of validated non-coding functional variants makes \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ma:2024:PPP, author = "Ke Ma and Jiawei Li and Mengyuan Zhao and Ibrahim Zamit and Bin Lin and Fei Guo and Jijun Tang", title = "{PPRTGI}: a Personalized {PageRank} Graph Neural Network for {TF}-Target Gene Interaction Detection", journal = j-TCBB, volume = "21", number = "3", pages = "480--491", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3374430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3374430", abstract = "Transcription factors (TFs) regulation is required for the vast majority of biological processes in living organisms. Some diseases may be caused by improper transcriptional regulation. Identifying the target genes of TFs is thus critical for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2024:VHI, author = "Weiling Li and Raunaq Malhotra and Steven Wu and Manjari Jha and Allen Rodrigo and Mary Poss and Raj Acharya", title = "{ViPRA-Haplo}: {{\em De Novo\/}} Reconstruction of Viral Populations Using Paired End Sequencing Data", journal = j-TCBB, volume = "21", number = "3", pages = "492--500", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3374595", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3374595", abstract = "We present ViPRA-Haplo, a {$<$ italic$>$ de} {novo$<$}/{italic$>$} strain-specific assembly workflow for reconstructing viral haplotypes in a viral population from paired-end next generation sequencing (NGS) data. The proposed Viral Path Reconstruction Algorithm (ViPRA) \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cardona:2024:CON, author = "Gabriel Cardona and Joan Carles Pons and Gerard Ribas and Tom{\'a}s Mart{\'\i}nez Coronado", title = "Comparison of Orchard Networks Using Their Extended $ \mu $-Representation", journal = j-TCBB, volume = "21", number = "3", pages = "501--507", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3361390", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3361390", abstract = "Phylogenetic networks generalize phylogenetic trees in order to model reticulation events. Although the comparison of phylogenetic trees is well studied, and there are multiple ways to do it in an efficient way, the situation is much different for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Valdes-Jimenez:2024:PAD, author = "Alejandro Vald{\'e}s-Jim{\'e}nez and Miguel Reyes-Parada and Gabriel N{\'u}{\~n}ez-Vivanco and Daniel Jim{\'e}nez-Gonz{\'a}lez", title = "Parallel Algorithm for Discovering and Comparing Three-Dimensional Proteins Patterns", journal = j-TCBB, volume = "21", number = "3", pages = "508--515", month = may, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3367789", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Sep 26 07:01:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3367789", abstract = "Identifying conserved (similar) three-dimensional patterns among a set of proteins can be helpful for the rational design of polypharmacological drugs. Some available tools allow this identification from a limited perspective, only considering the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2024:EDL, author = "Xiaokang Zhou and Carson K. Leung and Kevin I-Kai Wang and Giancarlo Fortino", title = "Editorial Deep Learning-Empowered Big Data Analytics in Biomedical Applications and Digital Healthcare", journal = j-TCBB, volume = "21", number = "4", pages = "516--520", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3371808", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3371808", abstract = "Deep learning and big data analysis are among the most important research topics in the fields of biomedical applications and digital healthcare. With the fast development of artificial intelligence (AI) and Internets of Things (IoT) technologies, deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cao:2024:CGC, author = "Kun Cao and Yangguang Cui and Liying Li and Junlong Zhou and Shiyan Hu", title = "{CPU-GPU} Cooperative {QoS} Optimization of Personalized Digital Healthcare Using Machine Learning and Swarm Intelligence", journal = j-TCBB, volume = "21", number = "4", pages = "521--533", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3207509", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3207509", abstract = "In recent decades, the rapid advances in information technology have promoted a widespread deployment of medical cyber-physical systems (MCPS), especially in the area of digital healthcare. In digital healthcare, medical edge devices empowered by CPU-GPU \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ganaie:2024:EDR, author = "M. A. Ganaie and M. Tanveer", title = "Ensemble Deep Random Vector Functional Link Network Using Privileged Information for {Alzheimer}'s Disease Diagnosis", journal = j-TCBB, volume = "21", number = "4", pages = "534--545", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3170351", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3170351", abstract = "Alzheimer's disease (AD) is a progressive brain disorder. Machine learning models have been proposed for the diagnosis of AD at early stage. Recently, deep learning architectures have received quite a lot attention. Most of the deep learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Malik:2024:GEE, author = "Ashwani Kumar Malik and M. Tanveer", title = "Graph Embedded Ensemble Deep Randomized Network for Diagnosis of {Alzheimer}'s Disease", journal = j-TCBB, volume = "21", number = "4", pages = "546--558", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3202707", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3202707", abstract = "Randomized shallow/deep neural networks with closed form solution avoid the shortcomings that exist in the back propagation (BP) based trained neural networks. Ensemble deep random vector functional link (edRVFL) network utilize the strength of two \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Nan:2024:MCA, author = "Fengtao Nan and Shunbao Li and Jiayu Wang and Yahui Tang and Jun Qi and Menghui Zhou and Zhong Zhao and Yun Yang and Po Yang", title = "A Multi-Classification Accessment Framework for Reproducible Evaluation of Multimodal Learning in {Alzheimer}'s Disease", journal = j-TCBB, volume = "21", number = "4", pages = "559--572", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3204619", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3204619", abstract = "Multimodal learning is widely used in automated early diagnosis of Alzheimer's disease. However, the current studies are based on an assumption that different modalities can provide more complementary information to help classify the samples from \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Razzak:2024:CME, author = "Imran Razzak and Saeeda Naz and Hamid Alinejad-Rokny and Tu N. Nguyen and Fahmi Khalifa", title = "A Cascaded Multiresolution Ensemble Deep Learning Framework for Large Scale {Alzheimer}'s Disease Detection Using Brain {MRIs}", journal = j-TCBB, volume = "21", number = "4", pages = "573--581", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3219032", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3219032", abstract = "Alzheimer's is progressive and irreversible type of dementia, which causes degeneration and death of cells and their connections in the brain. AD worsens over time and greatly impacts patients' life and affects their important mental \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ke:2024:DFL, author = "Hengjin Ke and Dan Chen and Quanming Yao and Yunbo Tang and Jia Wu and Jessica Monaghan and Paul Sowman and David McAlpine", title = "Deep Factor Learning for Accurate Brain Neuroimaging Data Analysis on Discrimination for Structural {MRI} and Functional {MRI}", journal = j-TCBB, volume = "21", number = "4", pages = "582--595", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3252577", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3252577", abstract = "Analysis of neuroimaging data (e.g., Magnetic Resonance Imaging, structural and functional MRI) plays an important role in monitoring brain dynamics and probing brain structures. Neuroimaging data are multi-featured and non-linear by nature, and it is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Deng:2024:MTL, author = "Lizhen Deng and Yuxin Cao and Zhongyang Wang and Xiaokang Wang and Yu Wang", title = "A Multidimensional Tensor Low Rank Method for Magnetic Resonance Image Denoising", journal = j-TCBB, volume = "21", number = "4", pages = "596--606", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3272893", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3272893", abstract = "In this paper, we present the Magnetic Resonance Image (MRI) denoising method via nonlocal multidimensional low rank tensor transformation constraint (NLRT). We first design a nonlocal MRI denoising method by non-local low rank tensor recovery framework. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2024:CGE, author = "Xiao Yu and Weimin Li and Jianjia Wang and Xing Wu and Bin Sheng", title = "Construction of Gene Expression Patterns to Identify Critical Genes Under {SARS-CoV-$2$} Infection Conditions", journal = j-TCBB, volume = "21", number = "4", pages = "607--618", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3283534", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3283534", abstract = "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2) is a positive-stranded single-stranded RNA virus with an envelope frequently altered by unstable genetic material, making it extremely difficult for vaccines, drugs, and diagnostics to work. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2024:SHO, author = "Yongting Zhang and Yonggang Gao and Huanhuan Wang and Huaming Wu and Youbing Xia and Xiang Wu", title = "A Secure High-Order Gene Interaction Detection Algorithm Based on Deep Neural Network", journal = j-TCBB, volume = "21", number = "4", pages = "619--630", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3214863", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3214863", abstract = "Identifying high-order Single Nucleotide Polymorphism (SNP) interactions of additive genetic model is crucial for detecting complex disease gene-type and predicting pathogenic genes of various disorders. We present a novel framework for high-order gene \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Djenouri:2024:SPP, author = "Youcef Djenouri and Asma Belhadi and Gautam Srivastava and Jerry Chun-Wei Lin", title = "A Secure Parallel Pattern Mining System for {Medical Internet of Things}", journal = j-TCBB, volume = "21", number = "4", pages = "631--643", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3233803", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3233803", abstract = "In this paper, a new generic parallel pattern mining framework called multi-objective Decomposition for Parallel Pattern-Mining (MD-PPM) is developed to solve challenges in the Internet of Medical Things through big data exploration. MD-PPM discovers \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sarkar:2024:HSB, author = "Joy Lal Sarkar and {Ramasamy V} and Abhishek Majumder and Bibudhendu Pati and Chhabi Rani Panigrahi and Weizheng Wang and Nawab Muhammad Faseeh Qureshi and Chunhua Su and Kapal Dev", title = "{I-Health}: {SDN}-Based Fog Architecture for {IIoT} Applications in Healthcare", journal = j-TCBB, volume = "21", number = "4", pages = "644--651", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3193918", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3193918", abstract = "The Industrial Internet of Things (IIoT) has been introduced in an era of increasingly broad potentials in the medical industry. In recent years, IIoT-based healthcare applications have grown in popularity, with the majority of them relying on Wireless \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Bhavani:2024:SCV, author = "T. Bhavani and P. VamseeKrishna and Chinmay Chakraborty and Priyanka Dwivedi", title = "Stress Classification and Vital Signs Forecasting for {IoT--Health} Monitoring", journal = j-TCBB, volume = "21", number = "4", pages = "652--659", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3196151", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3196151", abstract = "Health monitoring embedded with intelligence is the demand of the day. In this era of a large population with the emergence of a variety of diseases, the demand for healthcare facilities is high. Yet there is scarcity of medical experts, technicians for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lv:2024:DLE, author = "Zhihan Lv and Jinkang Guo and Haibin Lv", title = "Deep Learning-Empowered Clinical Big Data Analytics in Healthcare Digital Twins", journal = j-TCBB, volume = "21", number = "4", pages = "660--669", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3252668", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3252668", abstract = "With the rapid development of information technology, great changes have taken place in the way of managing, analyzing, and using data in all walks of life. Using deep learning algorithm for data analysis in the field of medicine can improve the accuracy \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2024:SDR, author = "Yichao Zhou and Zhisen Hu and Zuxing Xuan and Yangang Wang and Xiyuan Hu", title = "Synchronizing Detection and Removal of Smoke in Endoscopic Images With Cyclic Consistency Adversarial Nets", journal = j-TCBB, volume = "21", number = "4", pages = "670--680", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3204673", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3204673", abstract = "Smoke removal is an important and meaningful issue for endoscopic surgery, which can enhance the visual quality of endoscopic images. Because it is practically impossible to construct a large training dataset of pair-matched endoscopic images with/without \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2024:SNS, author = "Guoxia Xu and Hao Wang and Marius Pedersen and Meng Zhao and Hu Zhu", title = "{SSP-Net}: a {Siamese}-Based Structure-Preserving Generative Adversarial Network for Unpaired Medical Image Enhancement", journal = j-TCBB, volume = "21", number = "4", pages = "681--691", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3256709", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3256709", abstract = "Recently, unpaired medical image enhancement is one of the important topics in medical research. Although deep learning-based methods have achieved remarkable success in medical image enhancement, such methods face the challenge of low-quality training \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2024:AUD, author = "Hanchong Zhou and Henry Leung and Bhashyam Balaji", title = "{AR-UNet}: a Deformable Image Registration Network with Cyclic Training", journal = j-TCBB, volume = "21", number = "4", pages = "692--700", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3284215", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3284215", abstract = "Deformable image registration is a process to determine the non-linear spatial correspondence among deformed image pairs. Generative registration network is a novel structure involving a generative registration network and a discriminative network that \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Jiang:2024:SSL, author = "Zijing Jiang and Linyan Wang and Yaqi Wang and Gangyong Jia and Guodong Zeng and Jun Wang and Yunxiang Li and Dechao Chen and Guiping Qian and Qun Jin", title = "A Self-Supervised Learning Based Framework for Eyelid Malignant Melanoma Diagnosis in Whole Slide Images", journal = j-TCBB, volume = "21", number = "4", pages = "701--714", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3207352", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3207352", abstract = "Eyelid malignant melanoma (MM) is a rare disease with high mortality. Accurate diagnosis of such disease is important but challenging. In clinical practice, the diagnosis of MM is currently performed manually by pathologists, which is subjective and \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:MTI, author = "Wenyan Wang and Yongtao Li and Kun Lu and Jun Zhang and Peng Chen and Ke Yan and Bing Wang", title = "Medical Tumor Image Classification Based on Few-Shot Learning", journal = j-TCBB, volume = "21", number = "4", pages = "715--724", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3282226", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3282226", abstract = "As a high mortality disease, cancer seriously affects people's life and well-being. Reliance on pathologists to assess disease progression from pathological images is inaccurate and burdensome. Computer aided diagnosis (CAD) system can effectively \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2024:MML, author = "Liangfu Lu and Xudong Cui and Zhiyuan Tan and Yulei Wu", title = "{MedOptNet}: Meta-Learning Framework for Few-Shot Medical Image Classification", journal = j-TCBB, volume = "21", number = "4", pages = "725--736", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3284846", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3284846", abstract = "In the medical research domain, limited data and high annotation costs have made efficient classification under few-shot conditions a popular research area. This paper proposes a meta-learning framework, termed MedOptNet, for few-shot medical image \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xie:2024:RCA, author = "Xia Xie and Yuanyishu Tian and Kaoru Ota and Mianxiong Dong and Zhelong Liu and Hai Jin and Dezhong Yao", title = "Reinforced Computer-Aided Framework for Diagnosing Thyroid Cancer", journal = j-TCBB, volume = "21", number = "4", pages = "737--747", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3251323", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3251323", abstract = "Thyroid cancer is the most pervasive disease in the endocrine system and is getting extensive attention. The most prevalent method for an early check is ultrasound examination. Traditional research mainly concentrates on promoting the performance of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zeng:2024:PFF, author = "Lirong Zeng and Mengxing Huang and Yuchun Li and Qiong Chen and Hong-Ning Dai", title = "Progressive Feature Fusion Attention Dense Network for Speckle Noise Removal in {OCT} Images", journal = j-TCBB, volume = "21", number = "4", pages = "748--756", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3205217", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3205217", abstract = "Although deep learning for Big Data analytics has achieved promising results in the field of optical coherence tomography (OCT) image denoising, the low recognition rate caused by complex noise distribution and a large number of redundant features is \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guan:2024:BDA, author = "Peiyuan Guan and Keping Yu and Wei Wei and YanLin Tan and Jia Wu", title = "Big Data Analytics on Lung Cancer Diagnosis Framework With Deep Learning", journal = j-TCBB, volume = "21", number = "4", pages = "757--768", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3281638", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3281638", abstract = "As the segment of diseased tissue in PET images is time-consuming, laborious and low accuracy, this work proposes an automated framework for PET image screening, denoising and diseased tissue segmentation. First, taking into account the characteristics of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Rehman:2024:DLT, author = "Amjad Rehman and Majid Harouni and Farzaneh Zogh and Tanzila Saba and Mohsen Karimi and Faten S. Alamri and Gwanggil Jeon", title = "Detection of Lungs Tumors in {CT} Scan Images Using Convolutional Neural Networks", journal = j-TCBB, volume = "21", number = "4", pages = "769--777", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3315303", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3315303", abstract = "Current human being's lifestyle has caused \& exacerbated many diseases. One of these diseases is cancer, and among all kinds of cancers like, brain pulmonary; lung cancer is fatal. The cancers could be detected early to save lives using \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2024:HHN, author = "Qingguo Zhou and Rui Zhao and Yilin Hu and Jinqiang Wang and Rui Zhou", title = "Hierarchical Hybrid Networks for Automatic Pulmonary Blood Vessel Segmentation in Computed Tomography Images", journal = j-TCBB, volume = "21", number = "4", pages = "778--788", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3281828", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3281828", abstract = "Pulmonary arterial hypertension (PAH) is considered the third most common cardiovascular disease after coronary heart disease and hypertension. The diagnosis of PAH is mainly based on the comprehensive judgment of computed tomography and other medical \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2024:CCM, author = "Fangfang Lu and Zhihao Zhang and Shuai Zhao and Xiantian Lin and Zhengyu Zhang and Bei Jin and Weiyan Gu and Jingjing Chen and Xiaoxin Wu", title = "{CMM}: a {CNN-MLP} Model for {COVID-19} Lesion Segmentation and Severity Grading", journal = j-TCBB, volume = "21", number = "4", pages = "789--802", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3253901", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3253901", abstract = "In this paper, a CNN-MLP model (CMM) is proposed for COVID-19 lesion segmentation and severity grading in CT images. The CMM starts by lung segmentation using UNet, and then segmenting the lesion from the lung region using a multi-scale deep supervised \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{He:2024:QEQ, author = "Zaobo He and Zhipeng Cai", title = "Quantifying the Effect of Quarantine Control and Optimizing Its Cost in {COVID-19} Pandemic", journal = j-TCBB, volume = "21", number = "4", pages = "803--813", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3215559", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3215559", abstract = "The novel coronavirus has been spreading worldwide and emerged as a public health crisis. As the rapid rise of infected population count, a wide variety of stringent non-pharmaceutical interventions have been taken by cities and countries around the globe,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ahmed:2024:AIB, author = "Imran Ahmed and Abdellah Chehri and Gwanggil Jeon", title = "Artificial Intelligence and Blockchain Enabled Smart Healthcare System for Monitoring and Detection of {COVID-19} in Biomedical Images", journal = j-TCBB, volume = "21", number = "4", pages = "814--822", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3294333", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3294333", abstract = "Millions of individuals around the world have been impacted by the ongoing coronavirus outbreak, known as the COVID-19 pandemic. Blockchain, Artificial Intelligence (AI), and other cutting-edge digital and innovative technologies have all offered \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wu:2024:CCC, author = "Yirui Wu and Qiran Kong and Lilai Zhang and Aniello Castiglione and Michele Nappi and Shaohua Wan", title = "{CDT-CAD}: Context-Aware Deformable Transformers for End-to-End Chest Abnormality Detection on {X}-Ray Images", journal = j-TCBB, volume = "21", number = "4", pages = "823--834", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3258455", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3258455", abstract = "Deep learning methods have achieved great success in medical image analysis domain. However, most of them suffer from slow convergency and high computing cost, which prevents their further widely usage in practical scenarios. Moreover, it has been proved \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2024:ICF, author = "Zheng Li and Xiaolong Xu and Xuefei Cao and Wentao Liu and Yiwen Zhang and Dehua Chen and Haipeng Dai", title = "Integrated {CNN} and Federated Learning for {COVID-19} Detection on Chest {X}-Ray Images", journal = j-TCBB, volume = "21", number = "4", pages = "835--845", month = jul, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2022.3184319", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Aug 22 12:10:22 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2022.3184319", abstract = "Currently, Coronavirus Disease 2019 (COVID-19) is still endangering world health and safety and deep learning (DL) is expected to be the most powerful method for efficient detection of COVID-19. However, patients' privacy concerns prohibit data \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2024:GEI, author = "Zhipeng Cai and Alexander Zelikovsky", title = "{Guest Editors}' Introduction to the Special Section on Bioinformatics Research and Applications", journal = j-TCBB, volume = "21", number = "5", pages = "1141--1142", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3390374", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3390374", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Duan:2024:CEN, author = "Junwen Duan and Shuyue Liu and Xincheng Liao and Feng Gong and Hailin Yue and Jianxin Wang", title = "{Chinese} {EMR} Named Entity Recognition Using Fused Label Relations Based on Machine Reading Comprehension Framework", journal = j-TCBB, volume = "21", number = "5", pages = "1143--1153", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3376591", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3376591", abstract = "Chinese electronic medical record (EMR) presents significant challenges for named entity recognition (NER) due to their specialized nature, unique language features, and diverse expressions. Traditionally, NER is treated as a sequence labeling task, where \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Li:2024:TPC, author = "Zeqian Li and Yijia Zhang and Peixuan Zhou", title = "Temporal Protein Complex Identification Based on Dynamic Heterogeneous Protein Information Network Representation Learning", journal = j-TCBB, volume = "21", number = "5", pages = "1154--1164", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3351078", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3351078", abstract = "Protein complexes, as the fundamental units of cellular function and regulation, play a crucial role in understanding the normal physiological functions of cells. Existing methods for protein complex identification attempt to introduce other biological \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yan:2024:GES, author = "Da Yan and Catia Pesquita and Carsten G{\"o}rg and Jake Y. Chen", title = "Guest Editorial Selected Papers From {BIOKDD 2022}", journal = j-TCBB, volume = "21", number = "5", pages = "1165--1167", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3429784", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3429784", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Tanvir:2024:DPH, author = "Farhan Tanvir and Khaled Mohammed Saifuddin and Muhammad Ifte Khairul Islam and Esra Akbas", title = "{DDI} Prediction With Heterogeneous Information Network --- Meta-Path Based Approach", journal = j-TCBB, volume = "21", number = "5", pages = "1168--1179", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3417715", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3417715", abstract = "Drug-drug interaction (DDI) indicates where a particular drug's desired course of action is modified when taken with other drug (s). DDIs may hamper, enhance, or reduce the expected effect of either drug or, in the worst possible scenario, cause an \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Raza:2024:ICD, author = "Shaina Raza and Chen Ding", title = "Improving Clinical Decision Making With a Two-Stage Recommender System", journal = j-TCBB, volume = "21", number = "5", pages = "1180--1190", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3318209", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3318209", abstract = "Clinical decision-making is complex and time-intensive. To help in this effort, clinical recommender systems (RS) have been designed to facilitate healthcare practitioners with personalized advice. However, designing an effective clinical RS poses \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ji:2024:SSL, author = "Cunmei Ji and Ning Yu and Yutian Wang and Jiancheng Ni and Chunhou Zheng", title = "{SGLMDA}: a Subgraph Learning-Based Method for {miRNA}-Disease Association Prediction", journal = j-TCBB, volume = "21", number = "5", pages = "1191--1201", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3373772", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3373772", abstract = "MicroRNAs (miRNA) are endogenous non-coding RNAs, typically around 23 nucleotides in length. Many miRNAs have been founded to play crucial roles in gene regulation though post-transcriptional repression in animals. Existing studies suggest that the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Duan:2024:BAD, author = "Junwen Duan and Huai Guo and Han Jiang and Fei Guo and Jianxin Wang", title = "Boundary-Aware Dual Biaffine Model for Sequential Sentence Classification in Biomedical Documents", journal = j-TCBB, volume = "21", number = "5", pages = "1202--1210", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3376566", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3376566", abstract = "Assigning appropriate rhetorical roles, such as &\#x201C;background,&\#x201D; &\#x201C;intervention,&\#x201D; and &\#x201C;outcome,&\#x201D; to sentences in biomedical documents can streamline the process for physicians to locate evidence and resources for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hsiao:2024:NMC, author = "Yen-Che Hsiao and Abhishek Dutta", title = "Network Modeling and Control of Dynamic Disease Pathways, Review and Perspectives", journal = j-TCBB, volume = "21", number = "5", pages = "1211--1230", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3378155", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3378155", abstract = "Dynamic disease pathways are a combination of complex dynamical processes among bio-molecules in a cell that leads to diseases. Network modeling of disease pathways considers disease-related bio-molecules (e.g. DNA, RNA, transcription factors, enzymes, \ldots{})", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:TTD, author = "Qingsong Wang and Ruiquan Ge and Changmiao Wang and Ahmed Elazab and Qiming Fang and Renfeng Zhang", title = "{TDFFM}: Transformer and Deep Forest Fusion Model for Predicting Coronavirus {3C}-Like Protease Cleavage Sites", journal = j-TCBB, volume = "21", number = "5", pages = "1231--1241", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3378470", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3378470", abstract = "COVID-19, caused by the highly contagious SARS-CoV-2 virus, is distinguished by its positive-sense, single-stranded RNA genome. A thorough understanding of SARS-CoV-2 pathogenesis is crucial for halting its proliferation. Notably, the 3C-like protease of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Palacio:2024:DPC, author = "Ana Le{\'o}n Palacio and Alberto Garc{\'\i}a S. and Jos{\'e} Fabi{\'a}n Reyes Rom{\'a}n and Mireia Costa and Oscar Pastor", title = "The {Delfos Platform}: a Conceptual Model-Based Solution for the Enhancement of Precision Medicine", journal = j-TCBB, volume = "21", number = "5", pages = "1242--1253", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3377928", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3377928", abstract = "The use in the clinical practice of the vast amount of genomic data generated by current sequencing technologies constitutes a bottleneck for the progress of Precision Medicine (PM). Various problems inherent to the genomics domain (i.e., dispersion, \ldots{})", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Cai:2024:FPP, author = "Changfeng Cai and Jianghui Li and Yuanling Xia and Weihua Li", title = "{FluPMT}: Prediction of Predominant Strains of Influenza {A} Viruses via Multi-Task Learning", journal = j-TCBB, volume = "21", number = "5", pages = "1254--1263", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3378468", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3378468", abstract = "Seasonal influenza vaccines play a crucial role in saving numerous lives annually. However, the constant evolution of the influenza A virus necessitates frequent vaccine updates to ensure its ongoing effectiveness. The decision to develop a new vaccine \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Biswas:2024:FFG, author = "Sumona Biswas and Shovan Barma", title = "Feature Fusion {GAN} Based Virtual Staining on Plant Microscopy Images", journal = j-TCBB, volume = "21", number = "5", pages = "1264--1273", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3380634", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3380634", abstract = "Virtual staining of microscopy specimens using GAN-based methods could resolve critical concerns of manual staining process as displayed in recent studies on histopathology images. However, most of these works use basic-GAN framework ignoring microscopy \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2024:NMT, author = "Wenya Yang and Sai Zou and Hongfeng Gao and Lei Wang and Wei Ni", title = "A Novel Method for Targeted Identification of Essential Proteins by Integrating Chemical Reaction Optimization and Naive {Bayes} Model", journal = j-TCBB, volume = "21", number = "5", pages = "1274--1286", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3382392", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3382392", abstract = "Targeted identification of essential proteins is of great significance for species identification, drug manufacturing, and disease treatment. It is a challenge to analyze the binding mechanism between essential proteins and improve the identification \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yang:2024:EKO, author = "Sen Yang and Peng Cheng and Yang Liu and Dawei Feng and Shengqi Wang", title = "Exploring the Knowledge of an Outstanding Protein to Protein Interaction Transformer", journal = j-TCBB, volume = "21", number = "5", pages = "1287--1298", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3381825", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3381825", abstract = "Protein-to-protein interaction (PPI) prediction aims to predict whether two given proteins interact or not. Compared with traditional experimental methods of high cost and low efficiency, the current deep learning based approach makes it possible to \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chong:2024:DST, author = "Xiaoya Chong and Howard Leung and Qing Li and Jianhua Yao and Niyun Zhou", title = "Deep Spatio-Temporal Network for Low-{SNR} Cryo-{EM} Movie Frame Enhancement", journal = j-TCBB, volume = "21", number = "5", pages = "1299--1310", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3380410", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3380410", abstract = "Cryo-EM in single particle analysis is known to have low SNR and requires to utilize several frames of the same particle sample to restore one high-quality image for visualizing that particle. However, the low SNR of cryo-EM movie and motion caused by \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Guo:2024:IDE, author = "Yin Guo and Yanni Xiao and Limin Li", title = "Identifying Differentially Expressed Genes in {RNA} Sequencing Data With Small Labelled Samples", journal = j-TCBB, volume = "21", number = "5", pages = "1311--1321", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3382147", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3382147", abstract = "RNA-seq, including bulk RNA-seq and single-cell RNA-seq, is a next-generation sequencing-based RNA profiling method capable of measuring gene expression patterns with high resolution, and has gradually become an essential tool for the analysis of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hosseini:2024:MDR, author = "Seyed Hamid Hosseini and Mahdi Imani", title = "Modeling Defensive Response of Cells to Therapies: Equilibrium Interventions for Regulatory Networks", journal = j-TCBB, volume = "21", number = "5", pages = "1322--1334", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3383814", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3383814", abstract = "A major objective in genomics is to design interventions that can shift undesirable behaviors of such systems (i.e., those associated with cancers) into desirable ones. Several intervention policies have been developed in recent years, including dynamic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:NEM, author = "Huan Wang and Ziwen Cui and Yinguang Yang and Baijing Wang and Lida Zhu and Wen Zhang", title = "A Network Enhancement Method to Identify Spurious Drug-Drug Interactions", journal = j-TCBB, volume = "21", number = "5", pages = "1335--1347", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3385796", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3385796", abstract = "As medical safety and drug regulation gain heightened attention, the detection of spurious drug-drug interactions (DDI) has become key in healthcare. Although current research using graph neural networks (GNNs) to predict DDI has shown impressive results, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liang:2024:MPM, author = "Ying Liang and Xiya You and Zequn Zhang and Shi Qiu and Suhui Li and Lianlian Fu", title = "{MGFmiRNAloc}: Predicting {miRNA} Subcellular Localization Using Molecular Graph Feature and Convolutional Block Attention Module", journal = j-TCBB, volume = "21", number = "5", pages = "1348--1357", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3383438", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3383438", abstract = "MiRNA has distinct physiological functions at various cellular locations. However, few effective computational methods for predicting the subcellular location of miRNA exist, thereby leaving considerable room for improvement. Accordingly, our study \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ge:2024:HNH, author = "Ruiquan Ge and Yixiao Xia and Minchao Jiang and Gangyong Jia and Xiaoyang Jing and Ye Li and Yunpeng Cai", title = "{HybAVPnet}: a Novel Hybrid Network Architecture for Antiviral Peptides Prediction", journal = j-TCBB, volume = "21", number = "5", pages = "1358--1365", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3385635", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3385635", abstract = "Viruses pose a great threat to human production and life, thus the research and development of antiviral drugs is urgently needed. Antiviral peptides play an important role in drug design and development. Compared with the time-consuming and laborious wet \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:EEN, author = "Xubin Wang and Yunhe Wang and Zhiqiang Ma and Ka-Chun Wong and Xiangtao Li", title = "Exhaustive Exploitation of Nature-Inspired Computation for Cancer Screening in an Ensemble Manner", journal = j-TCBB, volume = "21", number = "5", pages = "1366--1379", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3385402", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3385402", abstract = "Accurate screening of cancer types is crucial for effective cancer detection and precise treatment selection. However, the association between gene expression profiles and tumors is often limited to a small number of biomarker genes. While computational \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Hu:2024:END, author = "Xiaowen Hu and Ying Jiang and Lei Deng", title = "Exploring {ncRNA}-Drug Sensitivity Associations via Graph Contrastive Learning", journal = j-TCBB, volume = "21", number = "5", pages = "1380--1389", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3385423", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3385423", abstract = "Increasing evidence has shown that noncoding RNAs (ncRNAs) can affect drug efficiency by modulating drug sensitivity genes. Exploring the association between ncRNAs and drug sensitivity is essential for drug discovery and disease prevention. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Mondal:2024:MTL, author = "Sankar Mondal and Pradipta Maji", title = "Multi-Task Learning and Sparse Discriminant Canonical Correlation Analysis for Identification of Diagnosis-Specific Genotype--Phenotype Association", journal = j-TCBB, volume = "21", number = "5", pages = "1390--1402", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3386406", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3386406", abstract = "The primary objective of imaging genetics research is to investigate the complex genotype-phenotype association for the disease under study. For example, to understand the impact of genetic variations over the brain functions and structure, the genotypic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Liu:2024:TAI, author = "Dian Liu and Zi Liu and Yunpeng Xia and Zhikang Wang and Jiangning Song and Dong-Jun Yu", title = "{TransC-ac4C}: Identification of {N4-Acetylcytidine} {(ac4C)} Sites in {mRNA} Using Deep Learning", journal = j-TCBB, volume = "21", number = "5", pages = "1403--1412", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3386972", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3386972", abstract = "N4-acetylcytidine (ac4C) is a post-transcriptional modification in mRNA that is critical in mRNA translation in terms of stability and regulation. In the past few years, numerous approaches employing convolutional neural networks (CNN) and Transformer \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lan:2024:LPC, author = "Wei Lan and Chunling Li and Qingfeng Chen and Ning Yu and Yi Pan and Yu Zheng and Yi-Ping Phoebe Chen", title = "{LGCDA}: Predicting {CircRNA-Disease} Association Based on Fusion of Local and Global Features", journal = j-TCBB, volume = "21", number = "5", pages = "1413--1422", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3387913", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3387913", abstract = "CircRNA has been shown to be involved in the occurrence of many diseases. Several computational frameworks have been proposed to identify circRNA-disease associations. Despite the existing computational methods have obtained considerable successes, these \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Chen:2024:PKN, author = "Mingshuai Chen and Quan Zou and Ren Qi and Yijie Ding", title = "{PseU-KeMRF}: a Novel Method for Identifying {RNA} Pseudouridine Sites", journal = j-TCBB, volume = "21", number = "5", pages = "1423--1435", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3389094", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3389094", abstract = "Pseudouridine is a type of abundant RNA modification that is seen in many different animals and is crucial for a variety of biological functions. Accurately identifying pseudouridine sites within the RNA sequence is vital for the subsequent study of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Alkhanbouli:2024:ACA, author = "Razan Alkhanbouli and Amira Al-Aamri and Maher Maalouf and Kamal Taha and Andreas Henschel and Dirar Homouz", title = "Analysis of Cancer-Associated Mutations of {POLB} Using Machine Learning and Bioinformatics", journal = j-TCBB, volume = "21", number = "5", pages = "1436--1444", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3395777", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3395777", abstract = "DNA damage is a critical factor in the onset and progression of cancer. When DNA is damaged, the number of genetic mutations increases, making it necessary to activate DNA repair mechanisms. A crucial factor in the base excision repair process, which \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lu:2024:RPC, author = "Pengli Lu and Yuehao Wang", title = "{RDGAN}: Prediction of {circRNA}-Disease Associations via Resistance Distance and Graph Attention Network", journal = j-TCBB, volume = "21", number = "5", pages = "1445--1457", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3402248", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3402248", abstract = "As a series of single-stranded RNAs, circRNAs have been implicated in numerous diseases and can serve as valuable biomarkers for disease therapy and prevention. However, traditional biological experiments demand significant time and effort. Therefore, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2024:DTB, author = "Lingling Zhao and Yan Zhu and Naifeng Wen and Chunyu Wang and Junjie Wang and Yongfeng Yuan", title = "Drug-Target Binding Affinity Prediction in a Continuous Latent Space Using Variational Autoencoders", journal = j-TCBB, volume = "21", number = "5", pages = "1458--1467", month = sep # "\slash " # oct, year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3402661", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Oct 24 08:15:46 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3402661", abstract = "Accurate prediction of Drug-Target binding Affinity (DTA) is a daunting yet pivotal task in the sphere of drug discovery. Over the years, a plethora of deep learning-based DTA models have emerged, rendering promising results in predicting the binding \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Lam:2024:GEG, author = "Su Datt Lam and Wai Keat Yam and Yi-Ping Phoebe Chen", title = "Guest Editorial for the {20th Asia Pacific Bioinformatics Conference}", journal = j-TCBB, volume = "21", number = "6", pages = "1601--1603", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3475108", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3475108", abstract = "The four papers in this special section were presented at the 20th Asia Pacific Bioinformatics Conference (APBC), which was held in Malaysia 26-28 April 2022.", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhu:2024:TEE, author = "Lin Zhu and Xin Kang and Chunhe Li and Jie Zheng", title = "{TMELand}: an End-to-End Pipeline for Quantification and Visualization of {Waddington}'s Epigenetic Landscape Based on Gene Regulatory Network", journal = j-TCBB, volume = "21", number = "6", pages = "1604--1612", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3285395", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3285395", abstract = "Waddington&\#x0027;s epigenetic landscape is a framework depicting the processes of cell differentiation and reprogramming under the control of a gene regulatory network (GRN). Traditional model-driven methods for landscape quantification focus on the \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yu:2024:MTD, author = "Chenglin Yu and Shu Zhang and Muheng Shang and Lei Guo and Junwei Han and Lei Du", title = "A Multi-Task Deep Feature Selection Method for Brain Imaging Genetics", journal = j-TCBB, volume = "21", number = "6", pages = "1613--1622", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3294413", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3294413", abstract = "Using brain imaging quantitative traits (QTs) for identifying genetic risk factors is an important research topic in brain imaging genetics. Many efforts have been made for this task via building linear models between imaging QTs and genetic factors such \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ido:2024:MIP, author = "Ryota Ido and Shengjuan Cao and Jianshen Zhu and Naveed Ahmed Azam and Kazuya Haraguchi and Liang Zhao and Hiroshi Nagamochi and Tatsuya Akutsu", title = "A Method for Inferring Polymers Based on Linear Regression and Integer Programming", journal = j-TCBB, volume = "21", number = "6", pages = "1623--1632", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3447780", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3447780", abstract = "A novel framework has recently been proposed for designing the molecular structure of chemical compounds with a desired chemical property using both artificial neural networks and mixed integer linear programming. In this paper, we design a new method for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sweeney:2024:FFS, author = "Michael D. Sweeney and Luke A. Torre-Healy and Virginia L. Ma and Margaret A. Hall and Lucie Chrastecka and Alisa Yurovsky and Richard A. Moffitt", title = "{FaStaNMF}: a Fast and Stable Non-Negative Matrix Factorization for Gene Expression", journal = j-TCBB, volume = "21", number = "6", pages = "1633--1644", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2023.3296979", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2023.3296979", abstract = "Gene expression analysis of samples with mixed cell types only provides limited insight to the characteristics of specific tissues. {$<$ italic$>$In} {silico$<$}/{italic$>$} deconvolution can be applied to extract cell type specific expression, thus avoiding \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xu:2024:HCF, author = "Wenwei Xu and Hao Zhang and Yewei Xia and Yixin Ren and Jihong Guan and Shuigeng Zhou", title = "Hybrid Causal Feature Selection for Cancer Biomarker Identification From {RNA-Seq} Data", journal = j-TCBB, volume = "21", number = "6", pages = "1645--1655", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3406922", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3406922", abstract = "The discovery of cancer biomarkers helps to advance medical diagnosis and plays an important role in biomedical applications. Most of the existing data-driven methods identify biomarkers by ranking-based strategies, which generally return a subset or \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Soylemez:2024:NAP, author = "{\"U}mm{\"u} G{\"u}ls{\"u}m S{\"o}ylemez and Malik Yousef and Z{\"u}lal Kesmen and Burcu Bakir-Gungor", title = "Novel Antimicrobial Peptide Design Using Motif Match Score Representation", journal = j-TCBB, volume = "21", number = "6", pages = "1656--1666", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3413021", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3413021", abstract = "Antimicrobial peptides (AMPs) have drawn the interest of the researchers since they offer an alternative to the traditional antibiotics in the fight against antibiotic resistance and they exhibit additional pharmaceutically significant properties. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Claussen:2024:ECE, author = "Henry Claussen and Santu Ghosh and Jie Chen", title = "Exploring Combined Effects of {DNA} Methylation and Copy Number on Gene Expression With a Two-Stage Approach", journal = j-TCBB, volume = "21", number = "6", pages = "1667--1675", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3406969", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3406969", abstract = "DNA methylation and copy number may be associated with each other to some extent, in positive or negative ways. Whether differential methylation and copy number variation have combined effects on gene expression is largely unknown. We use a multivariate \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhong:2024:RRG, author = "Jian Zhong and Haochen Zhao and Qichang Zhao and Ruikang Zhou and Lishen Zhang and Fei Guo and Jianxin Wang", title = "{RGCNPPIS}: a Residual Graph Convolutional Network for Protein-Protein Interaction Site Prediction", journal = j-TCBB, volume = "21", number = "6", pages = "1676--1684", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3410350", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3410350", abstract = "Accurate identification of protein-protein interaction (PPI) sites is crucial for understanding the mechanisms of biological processes, developing PPI networks, and detecting protein functions. Currently, most computational methods primarily concentrate \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhao:2024:IIG, author = "Wenting Zhao and Gongping Xu and Long Wang and Zhen Cui and Tong Zhang and Jian Yang", title = "Intra-Inter Graph Representation Learning for Protein-Protein Binding Sites Prediction", journal = j-TCBB, volume = "21", number = "6", pages = "1685--1696", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3416341", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3416341", abstract = "Graph neural networks have drawn increasing attention and achieved remarkable progress recently due to their potential applications for a large amount of irregular data. It is a natural way to represent protein as a graph. In this work, we focus on \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Dai:2024:DSB, author = "Yuanfei Dai and Bin Zhang and Shiping Wang", title = "Distantly Supervised Biomedical Relation Extraction via Negative Learning and Noisy Student Self-Training", journal = j-TCBB, volume = "21", number = "6", pages = "1697--1708", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3412174", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3412174", abstract = "Biomedical relation extraction aims to identify underlying relationships among entities, such as gene associations and drug interactions, within biomedical texts. Despite advancements in relation extraction in general knowledge domains, the scarcity of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kumar:2024:CNL, author = "Vikash Kumar and Akshay Deepak and Ashish Ranjan and Aravind Prakash", title = "{CrossPredGO}: a Novel Light-Weight Cross-Modal Multi-Attention Framework for Protein Function Prediction", journal = j-TCBB, volume = "21", number = "6", pages = "1709--1720", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3410696", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3410696", abstract = "Proteins are represented in various ways, each contributing differently to protein-related tasks. Here, information from each representation (protein sequence, 3D structure, and interaction data) is combined for an efficient protein function prediction \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Masood:2024:TFB, author = "M. Mohamed Divan Masood and D. Manjula and Vijayan Sugumaran", title = "Transcription Factor Binding Site Prediction Using {CnNet} Approach", journal = j-TCBB, volume = "21", number = "6", pages = "1721--1730", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3411024", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3411024", abstract = "Controlling the gene expression is the most important development in a living organism, which makes it easier to find different kinds of diseases and their causes. It&\#x0027;s very difficult to know what factors control the gene expression. Transcription \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2024:PIR, author = "Fujin Zhang and Zhangwei Li and Kailong Zhao and Pengxin Zhao and Guijun Zhang", title = "Prediction of Inter-Residue Multiple Distances and Exploration of Protein Multiple Conformations by Deep Learning", journal = j-TCBB, volume = "21", number = "6", pages = "1731--1739", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3411825", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3411825", abstract = "AlphaFold2 has achieved a major breakthrough in end-to-end prediction for static protein structures. However, protein conformational change is considered to be a key factor in protein biological function. Inter-residue multiple distances prediction is of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Qin:2024:PTP, author = "Xinyi Qin and Lu Zhang and Min Liu and Guangzhong Liu", title = "{PRFold-TNN}: Protein Fold Recognition With an Ensemble Feature Selection Method Using {PageRank} Algorithm Based on Transformer", journal = j-TCBB, volume = "21", number = "6", pages = "1740--1751", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3414497", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3414497", abstract = "Understanding the tertiary structures of proteins is of great benefit to function in many aspects of human life. Protein fold recognition is a vital and salient means to know protein structure. Until now, researchers have successively proposed a variety \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhang:2024:SGB, author = "Chengcheng Zhang and Wei Li and Ming Deng and Yizhang Jiang and Xiaohui Cui and Ping Chen", title = "{SIG}: Graph-Based Cancer Subtype Stratification With Gene Mutation Structural Information", journal = j-TCBB, volume = "21", number = "6", pages = "1752--1764", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3414498", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3414498", abstract = "Somatic tumors have a high-dimensional, sparse, and small sample size nature, making cancer subtype stratification based on somatic genomic data a challenge. Current methods for improving cancer clustering performance focus on dimension reduction, \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Sun:2024:SUG, author = "Weicheng Sun and Chengjuan Ren and Jinsheng Xu and Ping Zhang", title = "{SAGCN}: Using Graph Convolutional Network With Subgraph-Aware for {circRNA}--Drug Sensitivity Identification", journal = j-TCBB, volume = "21", number = "6", pages = "1765--1774", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3415058", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3415058", abstract = "Circular RNAs (circRNAs) play a significant role in cancer development and therapy resistance. There is substantial evidence indicating that the expression of circRNAs affects the sensitivity of cells to drugs. Identifying circRNAs-drug sensitivity \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Kim:2024:RSC, author = "Jongrae Kim and Woojeong Lee and Kwang-Hyun Cho", title = "Recursive Self-Composite Approach Toward Structural Understanding of {Boolean} Networks", journal = j-TCBB, volume = "21", number = "6", pages = "1775--1783", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3415352", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3415352", abstract = "Boolean networks have been widely used in systems biology to study the dynamical characteristics of biological networks such as steady-states or cycles, yet there has been little attention to the dynamic properties of network structures. Here, we \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Usman:2024:ALS, author = "Khalid Usman and Fangping Wan and Dan Zhao and Jian Peng and Jianyang Zeng", title = "Analyzing Large-Scale Single-Cell {RNA-Seq} Data Using {Coreset}", journal = j-TCBB, volume = "21", number = "6", pages = "1784--1793", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3418078", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3418078", abstract = "The recent boom in single-cell sequencing technologies provides valuable insights into the transcriptomes of individual cells. Through single-cell data analyses, a number of biological discoveries, such as novel cell types, developmental cell lineage \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Ju:2024:SGF, author = "Zhen Ju and Jingjing Zhang and Xuelei Li and Jintao Meng and Yanjie Wei", title = "{SeedHit}: a {GPU} Friendly Pre-Align Filtering Algorithm", journal = j-TCBB, volume = "21", number = "6", pages = "1794--1802", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3417517", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3417517", abstract = "The amount of genetic data generated by Next Generation Sequencing (NGS) technologies grows faster than Moore&\#x0027;s law. This necessitates the development of efficient NGS data processing and analysis algorithms. A filter before the computationally-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Xia:2024:BML, author = "Yunpeng Xia and Ying Zhang and Dian Liu and Yi-Heng Zhu and Zhikang Wang and Jiangning Song and Dong-Jun Yu", title = "{BLAM6A-Merge}: Leveraging Attention Mechanisms and Feature Fusion Strategies to Improve the Identification of {RNA} {N6}-Methyladenosine Sites", journal = j-TCBB, volume = "21", number = "6", pages = "1803--1815", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3418490", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3418490", abstract = "RNA N6-methyladenosine is a prevalent and abundant type of RNA modification that exerts significant influence on diverse biological processes. To date, numerous computational approaches have been developed for predicting methylation, with most of them \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Emadi:2024:IFC, author = "Marzieh Emadi and Farsad Zamani Boroujeni and Jamshid Pirgazi", title = "Improved Fuzzy Cognitive Maps for Gene Regulatory Networks Inference Based on Time Series Data", journal = j-TCBB, volume = "21", number = "6", pages = "1816--1829", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3423383", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3423383", abstract = "Microarray data provide lots of information regarding gene expression levels. Due to the large amount of such data, their analysis requires sufficient computational methods for identifying and analyzing gene regulation networks; however, researchers in \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Wang:2024:GCN, author = "Guangyu Wang and Ying Chu and Qianqian Wang and Limei Zhang and Lishan Qiao and Mingxia Liu", title = "Graph Convolutional Network With Self-Supervised Learning for Brain Disease Classification", journal = j-TCBB, volume = "21", number = "6", pages = "1830--1841", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3422152", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3422152", abstract = "Brain functional network (BFN) analysis has become a popular method for identifying neurological diseases at their early stages and revealing sensitive biomarkers related to these diseases. Due to the fact that BFN is a graph with complex structure, graph \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fan:2024:EII, author = "Xueqiang Fan and Bing Lin and Jun Hu and Zhongyi Guo", title = "{Ense-i6mA}: Identification of {DNA} {N$^6$}-Methyladenine Sites Using {XGB-RFE} Feature Selection and Ensemble Machine Learning", journal = j-TCBB, volume = "21", number = "6", pages = "1842--1854", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3421228", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3421228", abstract = "DNA {N$<$ sup$>$6$<$}/{sup$>$}-methyladenine (6mA) is an important epigenetic modification that plays a vital role in various cellular processes. Accurate identification of the 6mA sites is fundamental to elucidate the biological functions and mechanisms of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Yoshida:2024:TDE, author = "Ruriko Yoshida and David Barnhill and Keiji Miura and Daniel Howe", title = "Tropical Density Estimation of Phylogenetic Trees", journal = j-TCBB, volume = "21", number = "6", pages = "1855--1863", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3420815", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3420815", abstract = "Much evidence from biological theory and empirical data indicates that, gene trees, phylogenetic trees reconstructed from different genes (loci), do not have to have exactly the same tree topologies. Such incongruence between gene trees might be caused by \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Foo:2024:HFI, author = "Yong See Foo and Jennifer Flegg", title = "Haplotype Frequency Inference From Pooled Genetic Data With a Latent Multinomial Model", journal = j-TCBB, volume = "21", number = "6", pages = "1864--1873", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3420430", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3420430", abstract = "In genetic association studies, haplotype data provide more refined information than data about separate genetic markers. However, large-scale studies that genotype hundreds to thousands of individuals may only provide results of pooled data. Methods for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Feng:2024:PPM, author = "Hailin Feng and Chenchen Ke and Quan Zou and Zhechen Zhu and Tongcun Liu", title = "Prediction of Potential {miRNA}--Disease Associations Based on a Masked Graph Autoencoder", journal = j-TCBB, volume = "21", number = "6", pages = "1874--1885", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3421924", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3421924", abstract = "Biomedical evidence has demonstrated the relevance of microRNA (miRNA) dysregulation in complex human diseases, and determining the relationship between miRNAs and diseases can aid in the early detection and prevention of diseases. Traditional biological \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhu:2024:SSC, author = "Wentao Zhu and Zhiqiang Du and Ziang Xu and Defu Yang and Minghan Chen and Qianqian Song", title = "{SCRN}: Single-Cell Gene Regulatory Network Identification in {Alzheimer}'s Disease", journal = j-TCBB, volume = "21", number = "6", pages = "1886--1896", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3424400", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3424400", abstract = "Alzheimer&\#x0027;s disease (AD) is the most common neurodegenerative disease, and it consumes considerable medical resources with increasing number of patients every year. Mounting evidence show that the regulatory disruptions altering the intrinsic \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Fu:2024:HAP, author = "Xiuhao Fu and Hao Duan and Xiaofeng Zang and Chunling Liu and Xingfeng Li and Qingchen Zhang and Zilong Zhang and Quan Zou and Feifei Cui", title = "{Hyb\_SEnc}: an Antituberculosis Peptide Predictor Based on a Hybrid Feature Vector and Stacked Ensemble Learning", journal = j-TCBB, volume = "21", number = "6", pages = "1897--1910", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3425644", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3425644", abstract = "Tuberculosis has plagued mankind since ancient times, and the struggle between humans and tuberculosis continues. Mycobacterium tuberculosis is the leading cause of tuberculosis, infecting nearly one-third of the world&\#x0027;s population. The rise of \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Shehadeh:2024:MLA, author = "Fadi Shehadeh and LewisOscar Felix and Markos Kalligeros and Adnan Shehadeh and Beth Burgwyn Fuchs and Frederick M. Ausubel and Paul P. Sotiriadis and Eleftherios Mylonakis", title = "Machine Learning-Assisted High-Throughput Screening for Anti-{MRSA} Compounds", journal = j-TCBB, volume = "21", number = "6", pages = "1911--1921", year = "2024", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2024.3434340", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jan 18 06:37:18 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2024.3434340", abstract = "Background: Antimicrobial resistance is a major public health threat, and new agents are needed. Computational approaches have been proposed to reduce the cost and time needed for compound screening. Aims: A machine learning (ML) model was developed for \ldots{}", acknowledgement = ack-nhfb, ajournal = "IEEE/ACM Trans. Comput. Biol. Bioinform.", fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", }