%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.71", %%% date = "05 March 2026", %%% time = "11:39:08 MDT", %%% filename = "tkdd.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "12178 50456 254909 2402376", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "ACM Transactions on Knowledge Discovery from %%% Data (TKDD); bibliography; TKDD", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% ACM Transactions on Knowledge Discovery from %%% Data (TKDD) (CODEN ????, ISSN 1556-4681), %%% covering all journal issues from 2007 -- %%% date. %%% %%% At version 1.71, the COMPLETE journal %%% coverage looked like this: %%% %%% 2007 ( 14) 2014 ( 37) 2021 ( 112) %%% 2008 ( 18) 2015 ( 41) 2022 ( 126) %%% 2009 ( 25) 2016 ( 54) 2023 ( 137) %%% 2010 ( 26) 2017 ( 27) 2024 ( 234) %%% 2011 ( 11) 2018 ( 73) 2025 ( 176) %%% 2012 ( 26) 2019 ( 65) 2026 ( 35) %%% 2013 ( 20) 2020 ( 78) %%% %%% Article: 1335 %%% %%% Total entries: 1335 %%% %%% The journal Web page can be found at: %%% %%% http://www.acm.org/pubs/tkdd.html %%% %%% The journal table of contents page is at: %%% %%% http://www.acm.org/tkdd/ %%% http://portal.acm.org/browse_dl.cfm?idx=J1054 %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% The initial draft was extracted from the ACM %%% Web pages. %%% %%% ACM copyrights explicitly permit abstracting %%% with credit, so article abstracts, keywords, %%% and subject classifications have been %%% included in this bibliography wherever %%% available. Article reviews have been %%% omitted, until their copyright status has %%% been clarified. %%% %%% bibsource keys in the bibliography entries %%% below indicate the entry originally came %%% from the computer science bibliography %%% archive, even though it has likely since %%% been corrected and updated. %%% %%% URL keys in the bibliography point to %%% World Wide Web locations of additional %%% information about the entry. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility." %%% } %%% ==================================================================== @Preamble{"\input bibnames.sty" # "\def \TM {${}^{\sc TM}$}" # "\ifx \undefined \bioname \def \bioname#1{{{\em #1\/}}} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TKDD = "ACM Transactions on Knowledge Discovery from Data (TKDD)"} %%% ==================================================================== %%% Bibliography entries: @Article{Han:2007:I, author = "Jiawei Han", title = "Introduction", journal = j-TKDD, volume = "1", number = "1", pages = "1:1--1:??", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1217299.1217300", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:36 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Leskovec:2007:GED, author = "Jure Leskovec and Jon Kleinberg and Christos Faloutsos", title = "Graph evolution: {Densification} and shrinking diameters", journal = j-TKDD, volume = "1", number = "1", pages = "2:1--2:??", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1217299.1217301", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:36 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How do real graphs evolve over time? What are normal growth patterns in social, technological, and information networks? Many studies have discovered patterns in {\em static graphs}, identifying properties in a single snapshot of a large network or in a very small number of snapshots; these include heavy tails for in- and out-degree distributions, communities, small-world phenomena, and others. However, given the lack of information about network evolution over long periods, it has been hard to convert these findings into statements about trends over time.\par Here we study a wide range of real graphs, and we observe some surprising phenomena. First, most of these graphs densify over time with the number of edges growing superlinearly in the number of nodes. Second, the average distance between nodes often shrinks over time in contrast to the conventional wisdom that such distance parameters should increase slowly as a function of the number of nodes (like $ O(\log n) $ or $ O(\log (\log n))$).\par Existing graph generation models do not exhibit these types of behavior even at a qualitative level. We provide a new graph generator, based on a forest fire spreading process that has a simple, intuitive justification, requires very few parameters (like the flammability of nodes), and produces graphs exhibiting the full range of properties observed both in prior work and in the present study.\par We also notice that the forest fire model exhibits a sharp transition between sparse graphs and graphs that are densifying. Graphs with decreasing distance between the nodes are generated around this transition point.\par Last, we analyze the connection between the temporal evolution of the degree distribution and densification of a graph. We find that the two are fundamentally related. We also observe that real networks exhibit this type of relation between densification and the degree distribution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Densification power laws; graph generators; graph mining; heavy-tailed distributions; small-world phenomena", } @Article{Machanavajjhala:2007:DPB, author = "Ashwin Machanavajjhala and Daniel Kifer and Johannes Gehrke and Muthuramakrishnan Venkitasubramaniam", title = "{$L$}-diversity: {Privacy} beyond $k$-anonymity", journal = j-TKDD, volume = "1", number = "1", pages = "3:1--3:??", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1217299.1217302", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:36 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Publishing data about individuals without revealing sensitive information about them is an important problem. In recent years, a new definition of privacy called $k$-anonymity has gained popularity. In a $k$-anonymized dataset, each record is indistinguishable from at least $ k - 1$ other records with respect to certain identifying attributes.\par In this article, we show using two simple attacks that a $k$-anonymized dataset has some subtle but severe privacy problems. First, an attacker can discover the values of sensitive attributes when there is little diversity in those sensitive attributes. This is a known problem. Second, attackers often have background knowledge, and we show that $k$-anonymity does not guarantee privacy against attackers using background knowledge. We give a detailed analysis of these two attacks, and we propose a novel and powerful privacy criterion called $ \ell $-diversity that can defend against such attacks. In addition to building a formal foundation for $ \ell $-diversity, we show in an experimental evaluation that $ \ell $-diversity is practical and can be implemented efficiently.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "-diversity; Data privacy; ell-k-anonymity; privacy-preserving data publishing", } @Article{Gionis:2007:CA, author = "Aristides Gionis and Heikki Mannila and Panayiotis Tsaparas", title = "Clustering aggregation", journal = j-TKDD, volume = "1", number = "1", pages = "4:1--4:??", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1217299.1217303", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:36 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We consider the following problem: given a set of clusterings, find a single clustering that agrees as much as possible with the input clusterings. This problem, {\em clustering aggregation}, appears naturally in various contexts. For example, clustering categorical data is an instance of the clustering aggregation problem; each categorical attribute can be viewed as a clustering of the input rows where rows are grouped together if they take the same value on that attribute. Clustering aggregation can also be used as a metaclustering method to improve the robustness of clustering by combining the output of multiple algorithms. Furthermore, the problem formulation does not require a priori information about the number of clusters; it is naturally determined by the optimization function.\par In this article, we give a formal statement of the clustering aggregation problem, and we propose a number of algorithms. Our algorithms make use of the connection between clustering aggregation and the problem of {\em correlation clustering}. Although the problems we consider are NP-hard, for several of our methods, we provide theoretical guarantees on the quality of the solutions. Our work provides the best deterministic approximation algorithm for the variation of the correlation clustering problem we consider. We also show how sampling can be used to scale the algorithms for large datasets. We give an extensive empirical evaluation demonstrating the usefulness of the problem and of the solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "clustering aggregation; clustering categorical data; correlation clustering; Data clustering", } @Article{Bhattacharya:2007:CER, author = "Indrajit Bhattacharya and Lise Getoor", title = "Collective entity resolution in relational data", journal = j-TKDD, volume = "1", number = "1", pages = "5:1--5:??", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1217299.1217304", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:36 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Many databases contain uncertain and imprecise references to real-world entities. The absence of identifiers for the underlying entities often results in a database which contains multiple references to the same entity. This can lead not only to data redundancy, but also inaccuracies in query processing and knowledge extraction. These problems can be alleviated through the use of {\em entity resolution}. Entity resolution involves discovering the underlying entities and mapping each database reference to these entities. Traditionally, entities are resolved using pairwise similarity over the attributes of references. However, there is often additional relational information in the data. Specifically, references to different entities may cooccur. In these cases, collective entity resolution, in which entities for cooccurring references are determined jointly rather than independently, can improve entity resolution accuracy. We propose a novel relational clustering algorithm that uses both attribute and relational information for determining the underlying domain entities, and we give an efficient implementation. We investigate the impact that different relational similarity measures have on entity resolution quality. We evaluate our collective entity resolution algorithm on multiple real-world databases. We show that it improves entity resolution performance over both attribute-based baselines and over algorithms that consider relational information but do not resolve entities collectively. In addition, we perform detailed experiments on synthetically generated data to identify data characteristics that favor collective relational resolution over purely attribute-based algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "data cleaning; Entity resolution; graph clustering; record linkage", } @Article{Loh:2007:EEL, author = "Wei-Yin Loh and Chien-Wei Chen and Wei Zheng", title = "Extrapolation errors in linear model trees", journal = j-TKDD, volume = "1", number = "2", pages = "6:1--6:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1267066.1267067", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:48 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Prediction errors from a linear model tend to be larger when extrapolation is involved, particularly when the model is wrong. This article considers the problem of extrapolation and interpolation errors when a linear model tree is used for prediction. It proposes several ways to curtail the size of the errors, and uses a large collection of real datasets to demonstrate that the solutions are effective in reducing the average mean squared prediction error. The article also provides a proof that, if a linear model is correct, the proposed solutions have no undesirable effects as the training sample size tends to infinity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Decision tree; prediction; regression; statistics", } @Article{Zhang:2007:MPP, author = "Minghua Zhang and Ben Kao and David W. Cheung and Kevin Y. Yip", title = "Mining periodic patterns with gap requirement from sequences", journal = j-TKDD, volume = "1", number = "2", pages = "7:1--7:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1267066.1267068", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:48 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We study a problem of mining frequently occurring periodic patterns with a gap requirement from sequences. Given a character sequence $S$ of length $L$ and a pattern $P$ of length $l$, we consider $P$ a frequently occurring pattern in $S$ if the probability of {\em observing\/} $P$ given a randomly picked length-$l$ subsequence of $S$ exceeds a certain threshold. In many applications, particularly those related to bioinformatics, interesting patterns are {\em periodic\/} with a {\em gap requirement}. That is to say, the characters in $P$ should match subsequences of $S$ in such a way that the matching characters in $S$ are separated by gaps of more or less the same size. We show the complexity of the mining problem and discuss why traditional mining algorithms are computationally infeasible. We propose practical algorithms for solving the problem and study their characteristics. We also present a case study in which we apply our algorithms on some DNA sequences. We discuss some interesting patterns obtained from the case study.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "gap requirement; periodic pattern; Sequence mining", } @Article{Huang:2007:TTE, author = "Jen-Wei Huang and Bi-Ru Dai and Ming-Syan Chen", title = "{Twain}: {Two-end} association miner with precise frequent exhibition periods", journal = j-TKDD, volume = "1", number = "2", pages = "8:1--8:??", month = aug, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1267066.1267069", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:48 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We investigate the general model of mining associations in a temporal database, where the exhibition periods of items are allowed to be different from one to another. The database is divided into partitions according to the time granularity imposed. Such temporal association rules allow us to observe short-term but interesting patterns that are absent when the whole range of the database is evaluated altogether. Prior work may omit some temporal association rules and thus have limited practicability. To remedy this and to give more precise frequent exhibition periods of frequent temporal itemsets, we devise an efficient algorithm {\em Twain\/} (standing for {\em TWo end AssocIation miNer\/}). {\em Twain\/} not only generates frequent patterns with more precise frequent exhibition periods, but also discovers more interesting frequent patterns. {\em Twain\/} employs Start time and End time of each item to provide precise frequent exhibition period while progressively handling itemsets from one partition to another. Along with one scan of the database, {\em Twain\/} can generate frequent 2-itemsets directly according to the cumulative filtering threshold. Then, {\em Twain\/} adopts the scan reduction technique to generate all frequent $k$-itemsets ($k$ > 2) from the generated frequent 2-itemsets. Theoretical properties of {\em Twain\/} are derived as well in this article. The experimental results show that {\em Twain\/} outperforms the prior works in the quality of frequent patterns, execution time, I/O cost, CPU overhead and scalability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Association; temporal", } @Article{Bayardop:2007:ISI, author = "Roberto Bayardop and Kristin P. Bennett and Gautam Das and Dimitrios Gunopulos and Johannes Gunopulos", title = "Introduction to special issue {ACM SIGKDD 2006}", journal = j-TKDD, volume = "1", number = "3", pages = "9:1--9:??", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1297332.1297333", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:56 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bohm:2007:RPF, author = "Christian B{\"o}hm and Christos Faloutsos and Jia-Yu Pan and Claudia Plant", title = "{RIC}: {Parameter-free} noise-robust clustering", journal = j-TKDD, volume = "1", number = "3", pages = "10:1--10:??", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1297332.1297334", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:56 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How do we find a {\em natural\/} clustering of a real-world point set which contains an unknown number of clusters with different shapes, and which may be contaminated by noise? As most clustering algorithms were designed with certain assumptions (Gaussianity), they often require the user to give input parameters, and are sensitive to noise. In this article, we propose a robust framework for determining a natural clustering of a given dataset, based on the minimum description length (MDL) principle. The proposed framework, {\em robust information-theoretic clustering (RIC)}, is orthogonal to any known clustering algorithm: Given a preliminary clustering, RIC purifies these clusters from noise, and adjusts the clusterings such that it simultaneously determines the most natural amount and shape (subspace) of the clusters. Our RIC method can be combined with any clustering technique ranging from K-means and K-medoids to advanced methods such as spectral clustering. In fact, RIC is even able to purify and improve an initial coarse clustering, even if we start with very simple methods. In an extension, we propose a fully automatic stand-alone clustering method and efficiency improvements. RIC scales well with the dataset size. Extensive experiments on synthetic and real-world datasets validate the proposed RIC framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Clustering; data summarization; noise robustness; parameter-free data mining", } @Article{Mei:2007:SAF, author = "Qiaozhu Mei and Dong Xin and Hong Cheng and Jiawei Han and Chengxiang Zhai", title = "Semantic annotation of frequent patterns", journal = j-TKDD, volume = "1", number = "3", pages = "11:1--11:??", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1297332.1297335", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:56 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Using frequent patterns to analyze data has been one of the fundamental approaches in many data mining applications. Research in frequent pattern mining has so far mostly focused on developing efficient algorithms to discover various kinds of frequent patterns, but little attention has been paid to the important next step --- interpreting the discovered frequent patterns. Although the compression and summarization of frequent patterns has been studied in some recent work, the proposed techniques there can only annotate a frequent pattern with nonsemantical information (e.g., support), which provides only limited help for a user to understand the patterns.\par In this article, we study the novel problem of generating semantic annotations for frequent patterns. The goal is to discover the hidden meanings of a frequent pattern by annotating it with in-depth, concise, and structured information. We propose a general approach to generate such an annotation for a frequent pattern by constructing its context model, selecting informative context indicators, and extracting representative transactions and semantically similar patterns. This general approach can well incorporate the user's prior knowledge, and has potentially many applications, such as generating a dictionary-like description for a pattern, finding synonym patterns, discovering semantic relations, and summarizing semantic classes of a set of frequent patterns. Experiments on different datasets show that our approach is effective in generating semantic pattern annotations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Frequent pattern; pattern annotation; pattern context; pattern semantic analysis", } @Article{Koren:2007:MEP, author = "Yehuda Koren and Stephen C. North and Chris Volinsky", title = "Measuring and extracting proximity graphs in networks", journal = j-TKDD, volume = "1", number = "3", pages = "12:1--12:??", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1297332.1297336", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:56 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Measuring distance or some other form of proximity between objects is a standard data mining tool. Connection subgraphs were recently proposed as a way to demonstrate proximity between nodes in networks. We propose a new way of measuring and extracting proximity in networks called ``cycle-free effective conductance'' (CFEC). Importantly, the measured proximity is accompanied with a {\em proximity subgraph\/} which allows assessing and understanding measured values. Our proximity calculation can handle more than two endpoints, directed edges, is statistically well behaved, and produces an effectiveness score for the computed subgraphs. We provide an efficient algorithm to measure and extract proximity. Also, we report experimental results and show examples for four large network datasets: a telecommunications calling graph, the IMDB actors graph, an academic coauthorship network, and a movie recommendation system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Connection subgraph; cycle-free escape probability; escape probability; graph mining; proximity; proximity subgraph; random walk", } @Article{Ihler:2007:LDE, author = "Alexander Ihler and Jon Hutchins and Padhraic Smyth", title = "Learning to detect events with {Markov}-modulated {Poisson} processes", journal = j-TKDD, volume = "1", number = "3", pages = "13:1--13:??", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1297332.1297337", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:56 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Time-series of count data occur in many different contexts, including Internet navigation logs, freeway traffic monitoring, and security logs associated with buildings. In this article we describe a framework for detecting anomalous events in such data using an unsupervised learning approach. Normal periodic behavior is modeled via a time-varying Poisson process model, which in turn is modulated by a hidden Markov process that accounts for bursty events. We outline a Bayesian framework for learning the parameters of this model from count time-series. Two large real-world datasets of time-series counts are used as testbeds to validate the approach, consisting of freeway traffic data and logs of people entering and exiting a building. We show that the proposed model is significantly more accurate at detecting known events than a more traditional threshold-based technique. We also describe how the model can be used to investigate different degrees of periodicity in the data, including systematic day-of-week and time-of-day effects, and to make inferences about different aspects of events such as number of vehicles or people involved. The results indicate that the Markov-modulated Poisson framework provides a robust and accurate framework for adaptively and autonomously learning how to separate unusual bursty events from traces of normal human activity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Event detection; Markov modulated; Poisson", } @Article{Gionis:2007:ADM, author = "Aristides Gionis and Heikki Mannila and Taneli Mielik{\"a}inen and Panayiotis Tsaparas", title = "Assessing data mining results via swap randomization", journal = j-TKDD, volume = "1", number = "3", pages = "14:1--14:??", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1297332.1297338", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:58:56 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The problem of assessing the significance of data mining results on high-dimensional 0--1 datasets has been studied extensively in the literature. For problems such as mining frequent sets and finding correlations, significance testing can be done by standard statistical tests such as chi-square, or other methods. However, the results of such tests depend only on the specific attributes and not on the dataset as a whole. Moreover, the tests are difficult to apply to sets of patterns or other complex results of data mining algorithms. In this article, we consider a simple randomization technique that deals with this shortcoming. The approach consists of producing random datasets that have the same row and column margins as the given dataset, computing the results of interest on the randomized instances and comparing them to the results on the actual data. This randomization technique can be used to assess the results of many different types of data mining algorithms, such as frequent sets, clustering, and spectral analysis. To generate random datasets with given margins, we use variations of a Markov chain approach which is based on a simple swap operation. We give theoretical results on the efficiency of different randomization methods, and apply the swap randomization method to several well-known datasets. Our results indicate that for some datasets the structure discovered by the data mining algorithms is expected, given the row and column margins of the datasets, while for other datasets the discovered structure conveys information that is not captured by the margin counts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "0--1 data; randomization tests; Significance testing; swaps", } @Article{Tang:2008:TTA, author = "Lei Tang and Huan Liu and Jianping Zhang and Nitin Agarwal and John J. Salerno", title = "Topic taxonomy adaptation for group profiling", journal = j-TKDD, volume = "1", number = "4", pages = "1:1--1:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324172.1324173", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:07 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A topic taxonomy is an effective representation that describes salient features of virtual groups or online communities. A topic taxonomy consists of topic nodes. Each internal node is defined by its vertical path (i.e., ancestor and child nodes) and its horizontal list of attributes (or terms). In a text-dominant environment, a topic taxonomy can be used to flexibly describe a group's interests with varying granularity. However, the stagnant nature of a taxonomy may fail to timely capture the dynamic change of a group's interest. This article addresses the problem of how to adapt a topic taxonomy to the accumulated data that reflects the change of a group's interest to achieve dynamic group profiling. We first discuss the issues related to topic taxonomy. We next formulate taxonomy adaptation as an optimization problem to find the taxonomy that best fits the data. We then present a viable algorithm that can efficiently accomplish taxonomy adaptation. We conduct extensive experiments to evaluate our approach's efficacy for group profiling, compare the approach with some alternatives, and study its performance for dynamic group profiling. While pointing out various applications of taxonomy adaption, we suggest some future work that can take advantage of burgeoning Web 2.0 services for online targeted marketing, counterterrorism in connecting dots, and community tracking.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "dynamic profiling; group interest; taxonomy adjustment; text hierarchical classification; Topic taxonomy", } @Article{Cormode:2008:FHH, author = "Graham Cormode and Flip Korn and S. Muthukrishnan and Divesh Srivastava", title = "Finding hierarchical heavy hitters in streaming data", journal = j-TKDD, volume = "1", number = "4", pages = "2:1--2:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324172.1324174", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:07 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data items that arrive online as streams typically have attributes which take values from one or more hierarchies (time and geographic location, source and destination IP addresses, etc.). Providing an aggregate view of such data is important for summarization, visualization, and analysis. We develop an aggregate view based on certain organized sets of large-valued regions (``heavy hitters'') corresponding to hierarchically discounted frequency counts. We formally define the notion of {\em hierarchical heavy hitters\/} (HHHs). We first consider computing (approximate) HHHs over a data stream drawn from a single hierarchical attribute. We formalize the problem and give deterministic algorithms to find them in a single pass over the input.\par In order to analyze a wider range of realistic data streams (e.g., from IP traffic-monitoring applications), we generalize this problem to multiple dimensions. Here, the semantics of HHHs are more complex, since a ``child'' node can have multiple ``parent'' nodes. We present online algorithms that find approximate HHHs in one pass, with provable accuracy guarantees. The product of hierarchical dimensions forms a mathematical lattice structure. Our algorithms exploit this structure, and so are able to track approximate HHHs using only a small, fixed number of statistics per stored item, regardless of the number of dimensions.\par We show experimentally, using real data, that our proposed algorithms yields outputs which are very similar (virtually identical, in many cases) to offline computations of the exact solutions, whereas straightforward heavy-hitters-based approaches give significantly inferior answer quality. Furthermore, the proposed algorithms result in an order of magnitude savings in data structure size while performing competitively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "approximation algorithms; Data mining; network data analysis", } @Article{Somaiya:2008:LCU, author = "Manas Somaiya and Christopher Jermaine and Sanjay Ranka", title = "Learning correlations using the mixture-of-subsets model", journal = j-TKDD, volume = "1", number = "4", pages = "3:1--3:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324172.1324175", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:07 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Using a mixture of random variables to model data is a tried-and-tested method common in data mining, machine learning, and statistics. By using mixture modeling it is often possible to accurately model even complex, multimodal data via very simple components. However, the classical mixture model assumes that a data point is generated by a single component in the model. A lot of datasets can be modeled closer to the underlying reality if we drop this restriction. We propose a probabilistic framework, the {\em mixture-of-subsets (MOS) model}, by making two fundamental changes to the classical mixture model. First, we allow a data point to be generated by a set of components, rather than just a single component. Next, we limit the number of data attributes that each component can influence. We also propose an EM framework to learn the MOS model from a dataset, and experimentally evaluate it on real, high-dimensional datasets. Our results show that the MOS model learned from the data represents the underlying nature of the data accurately.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "EM algorithm; high-dimensional data; Mixture modeling", } @Article{Halkidi:2008:CFB, author = "M. Halkidi and D. Gunopulos and M. Vazirgiannis and N. Kumar and C. Domeniconi", title = "A clustering framework based on subjective and objective validity criteria", journal = j-TKDD, volume = "1", number = "4", pages = "4:1--4:??", month = jan, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1324172.1324176", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:07 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering, as an unsupervised learning process is a challenging problem, especially in cases of high-dimensional datasets. Clustering result quality can benefit from user constraints and objective validity assessment. In this article, we propose a semisupervised framework for learning the weighted Euclidean subspace, where the best clustering can be achieved. Our approach capitalizes on: (i) user constraints; and (ii) the quality of intermediate clustering results in terms of their structural properties. The proposed framework uses the clustering algorithm and the validity measure as its parameters. We develop and discuss algorithms for learning and tuning the weights of contributing dimensions and defining the ``best'' clustering obtained by satisfying user constraints. Experimental results on benchmark datasets demonstrate the superiority of the proposed approach in terms of improved clustering accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "cluster validity; data mining; Semisupervised learning; similarity measure learning; space learning", } @Article{Zaki:2008:ISI, author = "Mohammed J. Zaki and George Karypis and Jiong Yang and Wei Wang", title = "Introduction to special issue on bioinformatics", journal = j-TKDD, volume = "2", number = "1", pages = "1:1--1:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1342320.1342321", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:18 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jin:2008:CMM, author = "Ying Jin and T. M. Murali and Naren Ramakrishnan", title = "Compositional mining of multirelational biological datasets", journal = j-TKDD, volume = "2", number = "1", pages = "2:1--2:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1342320.1342322", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:18 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "High-throughput biological screens are yielding ever-growing streams of information about multiple aspects of cellular activity. As more and more categories of datasets come online, there is a corresponding multitude of ways in which inferences can be chained across them, motivating the need for compositional data mining algorithms. In this article, we argue that such compositional data mining can be effectively realized by functionally cascading redescription mining and biclustering algorithms as primitives. Both these primitives mirror shifts of vocabulary that can be composed in arbitrary ways to create rich chains of inferences. Given a relational database and its schema, we show how the schema can be automatically compiled into a compositional data mining program, and how different domains in the schema can be related through logical sequences of biclustering and redescription invocations. This feature allows us to rapidly prototype new data mining applications, yielding greater understanding of scientific datasets. We describe two applications of compositional data mining: (i) matching terms across categories of the Gene Ontology and (ii) understanding the molecular mechanisms underlying stress response in human cells.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Biclustering; bioinformatics; compositional data mining; inductive logic programming; redescription mining", } @Article{Sahay:2008:DSB, author = "Saurav Sahay and Sougata Mukherjea and Eugene Agichtein and Ernest V. Garcia and Shamkant B. Navathe and Ashwin Ram", title = "Discovering semantic biomedical relations utilizing the {Web}", journal = j-TKDD, volume = "2", number = "1", pages = "3:1--3:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1342320.1342323", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:18 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "To realize the vision of a Semantic Web for Life Sciences, discovering relations between resources is essential. It is very difficult to automatically extract relations from Web pages expressed in natural language formats. On the other hand, because of the explosive growth of information, it is difficult to manually extract the relations. In this paper we present techniques to automatically discover relations between biomedical resources from the Web. For this purpose we retrieve relevant information from Web Search engines and Pubmed database using various lexico-syntactic patterns as queries over SOAP web services. The patterns are initially handcrafted but can be progressively learnt. The extracted relations can be used to construct and augment ontologies and knowledge bases. Experiments are presented for general biomedical relation discovery and domain specific search to show the usefulness of our technique.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Ontology construction; relation identification", } @Article{Ye:2008:DSA, author = "Jieping Ye and Jianhui Chen and Ravi Janardan and Sudhir Kumar", title = "Developmental stage annotation of \bioname{Drosophila} gene expression pattern images via an entire solution path for {LDA}", journal = j-TKDD, volume = "2", number = "1", pages = "4:1--4:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1342320.1342324", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:18 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Gene expression in a developing embryo occurs in particular cells (spatial patterns) in a time-specific manner (temporal patterns), which leads to the differentiation of cell fates. Images of a \bioname{Drosophila melanogaster} embryo at a given developmental stage, showing a particular gene expression pattern revealed by a gene-specific probe, can be compared for spatial overlaps. The comparison is fundamentally important to formulating and testing gene interaction hypotheses. Expression pattern comparison is most biologically meaningful when images from a similar time point (developmental stage) are compared. In this paper, we present LdaPath, a novel formulation of Linear Discriminant Analysis (LDA) for automatic developmental stage range classification. It employs multivariate linear regression with the {$ L_1 $}-norm penalty controlled by a regularization parameter for feature extraction and visualization. LdaPath computes an entire solution path for all values of regularization parameter with essentially the same computational cost as fitting one LDA model. Thus, it facilitates efficient model selection. It is based on the equivalence relationship between LDA and the least squares method for multiclass classifications. This equivalence relationship is established under a mild condition, which we show empirically to hold for many high-dimensional datasets, such as expression pattern images. Our experiments on a collection of 2705 expression pattern images show the effectiveness of the proposed algorithm. Results also show that the LDA model resulting from LdaPath is sparse, and irrelevant features may be removed. Thus, LdaPath provides a general framework for simultaneous feature selection and feature extraction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "dimensionality reduction; Gene expression pattern image; linear discriminant analysis; linear regression", } @Article{Lu:2008:ADA, author = "Yijuan Lu and Qi Tian and Jennifer Neary and Feng Liu and Yufeng Wang", title = "Adaptive discriminant analysis for microarray-based classification", journal = j-TKDD, volume = "2", number = "1", pages = "5:1--5:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1342320.1342325", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:18 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Microarray technology has generated enormous amounts of high-dimensional gene expression data, providing a unique platform for exploring gene regulatory networks. However, the curse of dimensionality plagues effort to analyze these high throughput data. Linear Discriminant Analysis (LDA) and Biased Discriminant Analysis (BDA) are two popular techniques for dimension reduction, which pay attention to different roles of the positive and negative samples in finding discriminating subspace. However, the drawbacks of these two methods are obvious: LDA has limited efficiency in classifying sample data from subclasses with different distributions, and BDA does not account for the underlying distribution of negative samples.\par In this paper, we propose a novel dimension reduction technique for microarray analysis: Adaptive Discriminant Analysis (ADA), which effectively exploits favorable attributes of both BDA and LDA and avoids their unfavorable ones. ADA can find a good discriminative subspace with adaptation to different sample distributions. It not only alleviates the problem of high dimensionality, but also enhances the classification performance in the subspace with na{\"\i}ve Bayes classifier. To learn the best model fitting the real scenario, boosted Adaptive Discriminant Analysis is further proposed. Extensive experiments on the yeast cell cycle regulation data set, and the expression data of the red blood cell cycle in malaria parasite {\em Plasmodium falciparum\/} demonstrate the superior performance of ADA and boosted ADA. We also present some putative genes of specific functional classes predicted by boosted ADA. Their potential functionality is confirmed by independent predictions based on Gene Ontology, demonstrating that ADA and boosted ADA are effective dimension reduction methods for microarray-based classification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "ADA; BDA; boosted ADA; dimension reduction; LDA; microarray", } @Article{Hashimoto:2008:NEP, author = "Kosuke Hashimoto and Kiyoko Flora Aoki-Kinoshita and Nobuhisa Ueda and Minoru Kanehisa and Hiroshi Mamitsuka", title = "A new efficient probabilistic model for mining labeled ordered trees applied to glycobiology", journal = j-TKDD, volume = "2", number = "1", pages = "6:1--6:??", month = mar, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1342320.1342326", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:18 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Mining frequent patterns from large datasets is an important issue in data mining. Recently, complex and unstructured (or semi-structured) datasets have appeared as targets for major data mining applications, including text mining, web mining and bioinformatics. Our work focuses on labeled ordered trees, which are typically semi-structured datasets. In bioinformatics, carbohydrate sugar chains, or glycans, can be modeled as labeled ordered trees. Glycans are the third major class of biomolecules, having important roles in signaling and recognition. For mining labeled ordered trees, we propose a new probabilistic model and its efficient learning scheme which significantly improves the time and space complexity of an existing probabilistic model for labeled ordered trees. We evaluated the performance of the proposed model, comparing it with those of other probabilistic models, using synthetic as well as real datasets from glycobiology. Experimental results showed that the proposed model drastically reduced the computation time of the competing model, keeping the predictive power and avoiding overfitting to the training data. Finally, we assessed our results on real data from a variety of biological viewpoints, verifying known facts in glycobiology.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Expectation-maximization; labeled ordered trees; maximum likelihood; probabilistic models", } @Article{Ge:2008:JCA, author = "Rong Ge and Martin Ester and Byron J. Gao and Zengjian Hu and Binay Bhattacharya and Boaz Ben-Moshe", title = "Joint cluster analysis of attribute data and relationship data: {The} connected $k$-center problem, algorithms and applications", journal = j-TKDD, volume = "2", number = "2", pages = "7:1--7:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376815.1376816", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:30 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Attribute data and relationship data are two principal types of data, representing the intrinsic and extrinsic properties of entities. While attribute data have been the main source of data for cluster analysis, relationship data such as social networks or metabolic networks are becoming increasingly available. It is also common to observe both data types carry complementary information such as in market segmentation and community identification, which calls for a joint cluster analysis of both data types so as to achieve better results. In this article, we introduce the novel Connected $k$-Center ({\em CkC\/}) problem, a clustering model taking into account attribute data as well as relationship data. We analyze the complexity of the problem and prove its NP-hardness. Therefore, we analyze the approximability of the problem and also present a constant factor approximation algorithm. For the special case of the {\em CkC\/} problem where the relationship data form a tree structure, we propose a dynamic programming method giving an optimal solution in polynomial time. We further present NetScan, a heuristic algorithm that is efficient and effective for large real databases. Our extensive experimental evaluation on real datasets demonstrates the meaningfulness and accuracy of the NetScan results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "approximation algorithms; Attribute data; community identification; document clustering; joint cluster analysis; market segmentation; NP-hardness; relationship data", } @Article{Gupta:2008:BBC, author = "Gunjan Gupta and Joydeep Ghosh", title = "{Bregman} bubble clustering: a robust framework for mining dense clusters", journal = j-TKDD, volume = "2", number = "2", pages = "8:1--8:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376815.1376817", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:30 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In classical clustering, each data point is assigned to at least one cluster. However, in many applications only a small subset of the available data is relevant for the problem and the rest needs to be ignored in order to obtain good clusters. Certain nonparametric density-based clustering methods find the most relevant data as multiple dense regions, but such methods are generally limited to low-dimensional data and do not scale well to large, high-dimensional datasets. Also, they use a specific notion of ``distance'', typically Euclidean or Mahalanobis distance, which further limits their applicability. On the other hand, the recent One Class Information Bottleneck (OC-IB) method is fast and works on a large class of distortion measures known as Bregman Divergences, but can only find a {\em single\/} dense region. This article presents a broad framework for finding $k$ dense clusters while ignoring the rest of the data. It includes a seeding algorithm that can automatically determine a suitable value for {\em k}. When $k$ is forced to 1, our method gives rise to an improved version of OC-IB with optimality guarantees. We provide a generative model that yields the proposed iterative algorithm for finding $k$ dense regions as a special case. Our analysis reveals an interesting and novel connection between the problem of finding dense regions and exponential mixture models; a hard model corresponding to $k$ exponential mixtures with a uniform background results in a set of $k$ dense clusters. The proposed method describes a highly scalable algorithm for finding multiple dense regions that works with any Bregman Divergence, thus extending density based clustering to a variety of non-Euclidean problems not addressable by earlier methods. We present empirical results on three artificial, two microarray and one text dataset to show the relevance and effectiveness of our methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Bregman divergences; Density-based clustering; expectation maximization; exponential family; One Class classification", } @Article{Tan:2008:TMG, author = "Henry Tan and Fedja Hadzic and Tharam S. Dillon and Elizabeth Chang and Ling Feng", title = "Tree model guided candidate generation for mining frequent subtrees from {XML} documents", journal = j-TKDD, volume = "2", number = "2", pages = "9:1--9:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376815.1376818", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:30 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Due to the inherent flexibilities in both structure and semantics, XML association rules mining faces few challenges, such as: a more complicated hierarchical data structure and ordered data context. Mining frequent patterns from XML documents can be recast as mining frequent tree structures from a database of XML documents. In this study, we model a database of XML documents as a database of rooted labeled ordered subtrees. In particular, we are mainly concerned with mining frequent induced and embedded ordered subtrees. Our main contributions are as follows. We describe our unique {\em embedding list\/} representation of the tree structure, which enables efficient implementation of our {\em Tree Model Guided\/} ({\em TMG\/}) candidate generation. {\em TMG\/} is an optimal, nonredundant enumeration strategy that enumerates all the valid candidates that conform to the structural aspects of the data. We show through a mathematical model and experiments that {\em TMG\/} has better complexity compared to the commonly used join approach. In this article, we propose two algorithms, MB3-Miner and iMB3-Miner. MB3-Miner mines embedded subtrees. iMB3-Miner mines induced and/or embedded subtrees by using the {\em maximum level of embedding constraint}. Our experiments with both synthetic and real datasets against two well-known algorithms for mining induced and embedded subtrees, demonstrate the effectiveness and the efficiency of the proposed techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "FREQT; TMG; Tree mining; tree model guided; TreeMiner", } @Article{Islam:2008:STS, author = "Aminul Islam and Diana Inkpen", title = "Semantic text similarity using corpus-based word similarity and string similarity", journal = j-TKDD, volume = "2", number = "2", pages = "10:1--10:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376815.1376819", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:30 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present a method for measuring the semantic similarity of texts using a corpus-based measure of semantic word similarity and a normalized and modified version of the Longest Common Subsequence (LCS) string matching algorithm. Existing methods for computing text similarity have focused mainly on either large documents or individual words. We focus on computing the similarity between two sentences or two short paragraphs. The proposed method can be exploited in a variety of applications involving textual knowledge representation and knowledge discovery. Evaluation results on two different data sets show that our method outperforms several competing methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "corpus-based measures; Semantic similarity of words; similarity of short texts", } @Article{Sun:2008:ITA, author = "Jimeng Sun and Dacheng Tao and Spiros Papadimitriou and Philip S. Yu and Christos Faloutsos", title = "Incremental tensor analysis: {Theory} and applications", journal = j-TKDD, volume = "2", number = "3", pages = "11:1--11:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409620.1409621", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:41 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How do we find patterns in author-keyword associations, evolving over time? Or in data cubes (tensors), with product-branchcustomer sales information? And more generally, how to summarize high-order data cubes (tensors)? How to incrementally update these patterns over time? Matrix decompositions, like principal component analysis (PCA) and variants, are invaluable tools for mining, dimensionality reduction, feature selection, rule identification in numerous settings like streaming data, text, graphs, social networks, and many more settings. However, they have only two orders (i.e., matrices, like author and keyword in the previous example).\par We propose to envision such higher-order data as tensors, and tap the vast literature on the topic. However, these methods do not necessarily scale up, let alone operate on semi-infinite streams. Thus, we introduce a general framework, incremental tensor analysis (ITA), which efficiently computes a compact summary for high-order and high-dimensional data, and also reveals the hidden correlations. Three variants of ITA are presented: (1) dynamic tensor analysis (DTA); (2) streaming tensor analysis (STA); and (3) window-based tensor analysis (WTA). In particular, we explore several fundamental design trade-offs such as space efficiency, computational cost, approximation accuracy, time dependency, and model complexity.\par We implement all our methods and apply them in several real settings, such as network anomaly detection, multiway latent semantic indexing on citation networks, and correlation study on sensor measurements. Our empirical studies show that the proposed methods are fast and accurate and that they find interesting patterns and outliers on the real datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "multilinear algebra; stream mining; Tensor", } @Article{Mangasarian:2008:PPC, author = "Olvi L. Mangasarian and Edward W. Wild and Glenn M. Fung", title = "Privacy-preserving classification of vertically partitioned data via random kernels", journal = j-TKDD, volume = "2", number = "3", pages = "12:1--12:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409620.1409622", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:41 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We propose a novel privacy-preserving support vector machine (SVM) classifier for a data matrix $A$ whose input feature columns are divided into groups belonging to different entities. Each entity is unwilling to share its group of columns or make it public. Our classifier is based on the concept of a reduced kernel $ k(A, B \prime)$, where $ B \prime $ is the transpose of a random matrix $B$. The column blocks of $B$ corresponding to the different entities are privately generated by each entity and never made public. The proposed linear or nonlinear SVM classifier, which is public but does not reveal any of the privately held data, has accuracy comparable to that of an ordinary SVM classifier that uses the entire set of input features directly.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Privacy preserving classification; support vector machines; vertically partitioned data", } @Article{Lakshmanan:2008:DRA, author = "Laks V. S. Lakshmanan and Raymond T. Ng and Ganesh Ramesh", title = "On disclosure risk analysis of anonymized itemsets in the presence of prior knowledge", journal = j-TKDD, volume = "2", number = "3", pages = "13:1--13:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409620.1409623", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:41 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Decision makers of companies often face the dilemma of whether to release data for knowledge discovery, vis-a-vis the risk of disclosing proprietary or sensitive information. Among the various methods employed for ``sanitizing'' the data prior to disclosure, we focus in this article on anonymization, given its widespread use in practice. We do due diligence to the question ``just how safe is the anonymized data?'' We consider both those scenarios when the hacker has no information and, more realistically, when the hacker may have partial information about items in the domain. We conduct our analyses in the context of frequent set mining and address the safety question at two different levels: (i) how likely of being cracked (i.e., re-identified by a hacker), are the identities of individual items and (ii) how likely are sets of items cracked? For capturing the prior knowledge of the hacker, we propose a {\em belief function}, which amounts to an educated guess of the frequency of each item. For various classes of belief functions which correspond to different degrees of prior knowledge, we derive formulas for computing the expected number of cracks of single items and for itemsets, the probability of cracking the itemsets. While obtaining, exact values for more general situations is computationally hard, we propose a series of heuristics called the {\em O-estimates}. They are easy to compute and are shown fairly accurate, justified by empirical results on real benchmark datasets. Based on the O-estimates, we propose a recipe for the decision makers to resolve their dilemma. Our recipe operates at two different levels, depending on whether the data owner wants to reason in terms of single items or sets of items (or both). Finally, we present techniques for ascertaining a hacker's knowledge of correlation in terms of co-occurrence of items likely. This information regarding the hacker's knowledge can be incorporated into our framework of disclosure risk analysis and we present experimental results demonstrating how this knowledge affects the heuristic estimates we have developed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "anonymization; belief function; bipartite graphs; correlation; Disclosure risk; frequent itemsets; hacker; matching; prior knowledge; sampling", } @Article{Vaidya:2008:PPD, author = "Jaideep Vaidya and Chris Clifton and Murat Kantarcioglu and A. Scott Patterson", title = "Privacy-preserving decision trees over vertically partitioned data", journal = j-TKDD, volume = "2", number = "3", pages = "14:1--14:??", month = oct, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1409620.1409624", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:41 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Privacy and security concerns can prevent sharing of data, derailing data-mining projects. Distributed knowledge discovery, if done correctly, can alleviate this problem. We introduce a generalized privacy-preserving variant of the ID3 algorithm for vertically partitioned data distributed over two or more parties. Along with a proof of security, we discuss what would be necessary to make the protocols completely secure. We also provide experimental results, giving a first demonstration of the practical complexity of secure multiparty computation-based data mining.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Decision tree classification; privacy", } @Article{Chuang:2009:FPS, author = "Kun-Ta Chuang and Hung-Leng Chen and Ming-Syan Chen", title = "Feature-preserved sampling over streaming data", journal = j-TKDD, volume = "2", number = "4", pages = "15:1--15:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1460797.1460798", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:51 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article, we explore a novel sampling model, called {\em feature preserved sampling\/} ({\em FPS\/}) that sequentially generates a high-quality sample over sliding windows. The sampling quality we consider refers to the degree of consistency between the sample proportion and the population proportion of each attribute value in a window. Due to the time-variant nature of real-world datasets, users are more likely to be interested in the most recent data. However, previous works have not been able to generate a high-quality sample over sliding windows that precisely preserves up-to-date population characteristics. Motivated by this shortcoming, we have developed the {\em FPS\/} algorithm, which has several advantages: (1) it sequentially generates a sample from a time-variant data source over sliding windows; (2) the execution time of {\em FPS\/} is linear with respect to the database size; (3) the {\em relative\/} proportional differences between the sample proportions and population proportions of most distinct attribute values are guaranteed to be below a specified error threshold, $ \epsilon $, while the {\em relative\/} proportion differences of the remaining attribute values are as close to $ \epsilon $ as possible, which ensures that the generated sample is of high quality; (4) the sample rate is close to the user specified rate so that a high quality sampling result can be obtained without increasing the sample size; (5) by a thorough analytical and empirical study, we prove that {\em FPS\/} has acceptable space overheads, especially when the attribute values have Zipfian distributions, and {\em FPS\/} can also excellently preserve the population proportion of multivariate features in the sample; and (6) {\em FPS\/} can be applied to infinite streams and finite datasets equally, and the generated samples can be used for various applications. Our experiments on both real and synthetic data validate that {\em FPS\/} can effectively obtain a high quality sample of the desired size. In addition, while using the sample generated by {\em FPS\/} in various mining applications, a significant improvement in efficiency can be achieved without compromising the model's precision.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "sampling; Streaming mining", } @Article{Jiang:2009:MFC, author = "Daxin Jiang and Jian Pei", title = "Mining frequent cross-graph quasi-cliques", journal = j-TKDD, volume = "2", number = "4", pages = "16:1--16:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1460797.1460799", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:51 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Joint mining of multiple datasets can often discover interesting, novel, and reliable patterns which cannot be obtained solely from any single source. For example, in bioinformatics, jointly mining multiple gene expression datasets obtained by different labs or during various biological processes may overcome the heavy noise in the data. Moreover, by joint mining of gene expression data and protein-protein interaction data, we may discover clusters of genes which show coherent expression patterns and also produce interacting proteins. Such clusters may be potential pathways.\par In this article, we investigate a novel data mining problem, {\em mining frequent cross-graph quasi-cliques}, which is generalized from several interesting applications in bioinformatics, cross-market customer segmentation, social network analysis, and Web mining. In a graph, a set of vertices $S$ is a $ \gamma $-quasi-clique $ (0 < \gamma \leq 1)$ if each vertex $v$ in $S$ directly connects to at least $ \gamma \cdot (|S| - 1)$ other vertices in $S$. Given a set of graphs $ G_1, \ldots {}, G_n$ and parameter $ {\rm min \_ sup} (0 < {\rm min \_ sup} 1)$, a set of vertices $S$ is a frequent cross-graph quasi-clique if $S$ is a $ \gamma $-quasi-clique in at least $ {\rm min \_ sup} \cdot n$ graphs, and there does not exist a proper superset of $S$ having the property.\par We build a general model, show why the complete set of frequent cross-graph quasi-cliques cannot be found by previous data mining methods, and study the complexity of the problem. While the problem is difficult, we develop practical algorithms which exploit several interesting and effective techniques and heuristics to efficaciously mine frequent cross-graph quasi-cliques. A systematic performance study is reported on both synthetic and real data sets. We demonstrate some interesting and meaningful frequent cross-graph quasi-cliques in bioinformatics. The experimental results also show that our algorithms are efficient and scalable.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "bioinformatics; clique; Graph mining; joint mining", } @Article{Domeniconi:2009:WCE, author = "Carlotta Domeniconi and Muna Al-Razgan", title = "Weighted cluster ensembles: {Methods} and analysis", journal = j-TKDD, volume = "2", number = "4", pages = "17:1--17:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1460797.1460800", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:51 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Cluster ensembles offer a solution to challenges inherent to clustering arising from its ill-posed nature. Cluster ensembles can provide robust and stable solutions by leveraging the consensus across multiple clustering results, while averaging out emergent spurious structures that arise due to the various biases to which each participating algorithm is tuned. In this article, we address the problem of combining multiple {\em weighted clusters\/} that belong to different subspaces of the input space. We leverage the diversity of the input clusterings in order to generate a consensus partition that is superior to the participating ones. Since we are dealing with weighted clusters, our consensus functions make use of the weight vectors associated with the clusters. We demonstrate the effectiveness of our techniques by running experiments with several real datasets, including high-dimensional text data. Furthermore, we investigate in depth the issue of diversity and accuracy for our ensemble methods. Our analysis and experimental results show that the proposed techniques are capable of producing a partition that is as good as or better than the best individual clustering.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "accuracy and diversity measures; Cluster ensembles; consensus functions; data mining; subspace clustering; text data", } @Article{Zhang:2009:DGA, author = "Zhenjie Zhang and Laks V. S. Lakshmanan and Anthony K. H. Tung", title = "On domination game analysis for microeconomic data mining", journal = j-TKDD, volume = "2", number = "4", pages = "18:1--18:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1460797.1460801", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 17:59:51 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Game theory is a powerful tool for analyzing the competitions among manufacturers in a market. In this article, we present a study on combining game theory and data mining by introducing the concept of domination game analysis. We present a multidimensional market model, where every dimension represents one attribute of a commodity. Every product or customer is represented by a point in the multidimensional space, and a product is said to ``dominate'' a customer if all of its attributes can satisfy the requirements of the customer. The expected market share of a product is measured by the expected number of the buyers in the customers, all of which are equally likely to buy any product dominating him. A Nash equilibrium is a configuration of the products achieving stable expected market shares for all products. We prove that Nash equilibrium in such a model can be computed in polynomial time if every manufacturer tries to modify its product in a round robin manner. To further improve the efficiency of the computation, we also design two algorithms for the manufacturers to efficiently find their best response to other products in the market.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "data mining; Domination game; game theory", } @Article{Kriegel:2009:CHD, author = "Hans-Peter Kriegel and Peer Kr{\"o}ger and Arthur Zimek", title = "Clustering high-dimensional data: a survey on subspace clustering, pattern-based clustering, and correlation clustering", journal = j-TKDD, volume = "3", number = "1", pages = "1:1--1:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1497577.1497578", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "As a prolific research area in data mining, subspace clustering and related problems induced a vast quantity of proposed solutions. However, many publications compare a new proposition --- if at all --- with one or two competitors, or even with a so-called ``na{\"\i}ve'' ad hoc solution, but fail to clarify the exact problem definition. As a consequence, even if two solutions are thoroughly compared experimentally, it will often remain unclear whether both solutions tackle the same problem or, if they do, whether they agree in certain tacit assumptions and how such assumptions may influence the outcome of an algorithm. In this survey, we try to clarify: (i) the different problem definitions related to subspace clustering in general; (ii) the specific difficulties encountered in this field of research; (iii) the varying assumptions, heuristics, and intuitions forming the basis of different approaches; and (iv) how several prominent solutions tackle different problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "clustering; high-dimensional data; Survey", } @Article{Dhurandhar:2009:SAM, author = "Amit Dhurandhar and Alin Dobra", title = "Semi-analytical method for analyzing models and model selection measures based on moment analysis", journal = j-TKDD, volume = "3", number = "1", pages = "2:1--2:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1497577.1497579", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article we propose a moment-based method for studying models and model selection measures. By focusing on the probabilistic space of classifiers induced by the classification algorithm rather than on that of datasets, we obtain efficient characterizations for computing the moments, which is followed by visualization of the resulting formulae that are too complicated for direct interpretation. By assuming the data to be drawn independently and identically distributed from the underlying probability distribution, and by going over the space of all possible datasets, we establish general relationships between the generalization error, hold-out-set error, cross-validation error, and leave-one-out error. We later exemplify the method and the results by studying the behavior of the errors for the naive Bayes classifier.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "classification; generalization error; Model selection", } @Article{Cerf:2009:CPM, author = "Lo{\"\i}c Cerf and J{\'e}r{\'e}my Besson and C{\'e}line Robardet and Jean-Fran{\c{c}}ois Boulicaut", title = "Closed patterns meet $n$-ary relations", journal = j-TKDD, volume = "3", number = "1", pages = "3:1--3:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1497577.1497580", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Set pattern discovery from binary relations has been extensively studied during the last decade. In particular, many complete and efficient algorithms for frequent closed set mining are now available. Generalizing such a task to $n$-ary relations ($ n \geq 2$) appears as a timely challenge. It may be important for many applications, for example, when adding the time dimension to the popular {\em objects\/} $ \times $ {\em features\/} binary case. The generality of the task (no assumption being made on the relation arity or on the size of its attribute domains) makes it computationally challenging. We introduce an algorithm called Data-Peeler. From an $n$-ary relation, it extracts all closed $n$-sets satisfying given piecewise (anti) monotonic constraints. This new class of constraints generalizes both monotonic and antimonotonic constraints. Considering the special case of ternary relations, Data-Peeler outperforms the state-of-the-art algorithms CubeMiner and Trias by orders of magnitude. These good performances must be granted to a new clever enumeration strategy allowing to efficiently enforce the closeness property. The relevance of the extracted closed $n$-sets is assessed on real-life 3-and 4-ary relations. Beyond natural 3-or 4-ary relations, expanding a relation with an additional attribute can help in enforcing rather abstract constraints such as the robustness with respect to binarization. Furthermore, a collection of closed $n$-sets is shown to be an excellent starting point to compute a tiling of the dataset.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "$n$-ary relations; Closed patterns; constraint properties; constraint-based mining; tiling", } @Article{Angiulli:2009:DEA, author = "Fabrizio Angiulli and Fabio Fassetti", title = "{DOLPHIN}: an efficient algorithm for mining distance-based outliers in very large datasets", journal = j-TKDD, volume = "3", number = "1", pages = "4:1--4:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1497577.1497581", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this work a novel distance-based outlier detection algorithm, named DOLPHIN, working on disk-resident datasets and whose I/O cost corresponds to the cost of sequentially reading the input dataset file twice, is presented.\par It is both theoretically and empirically shown that the main memory usage of DOLPHIN amounts to a small fraction of the dataset and that DOLPHIN has linear time performance with respect to the dataset size. DOLPHIN gains efficiency by naturally merging together in a unified schema three strategies, namely the selection policy of objects to be maintained in main memory, usage of pruning rules, and similarity search techniques. Importantly, similarity search is accomplished by the algorithm without the need of preliminarily indexing the whole dataset, as other methods do.\par The algorithm is simple to implement and it can be used with any type of data, belonging to either metric or nonmetric spaces. Moreover, a modification to the basic method allows DOLPHIN to deal with the scenario in which the available buffer of main memory is smaller than its standard requirements. DOLPHIN has been compared with state-of-the-art distance-based outlier detection algorithms, showing that it is much more efficient.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Data mining; distance-based outliers; outlier detection", } @Article{Chen:2009:BAS, author = "Bee-Chung Chen and Raghu Ramakrishnan and Jude W. Shavlik and Pradeep Tamma", title = "Bellwether analysis: {Searching} for cost-effective query-defined predictors in large databases", journal = j-TKDD, volume = "3", number = "1", pages = "5:1--5:??", month = mar, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1497577.1497582", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:01 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How to mine massive datasets is a challenging problem with great potential value. Motivated by this challenge, much effort has concentrated on developing scalable versions of machine learning algorithms. However, the cost of mining large datasets is not just computational; preparing the datasets into the ``right form'' so that learning algorithms can be applied is usually costly, due to the human labor that is typically required and a large number of choices in data preparation, which include selecting different subsets of data and aggregating data at different granularities. We make the key observation that, for a number of practically motivated problems, these choices can be defined using database queries and analyzed in an automatic and systematic manner. Specifically, we propose a new class of data-mining problem, called {\em bellwether analysis}, in which the goal is to find a few query-defined predictors (e.g., first week sales of Peoria, IL of an item) that can be used to accurately predict the result of a target query (e.g., first year worldwide sales of the item) from a large number of queries that define candidate predictors. To make a prediction for a new item, the data needed to generate such predictors has to be collected (e.g., selling the new item in Peoria, IL for a week and collecting the sales data). A useful predictor is one that has high prediction accuracy and a low data-collection cost. We call such a cost-effective predictor a {\em bellwether}.\par This article introduces bellwether analysis, which integrates database query processing and predictive modeling into a single framework, and provides scalable algorithms for large datasets that cannot fit in main memory. Through a series of extensive experiments, we show that bellwethers do exist in real-world databases, and that our computation techniques achieve good efficiency on large datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "bellwether; Cost-effective prediction; data cube; OLAP queries; predictive models; scalable algorithms", } @Article{Liu:2009:ISI, author = "Huan Liu and John Salerno and Michael Young and Rakesh Agrawal and Philip S. Yu", title = "Introduction to special issue on social computing, behavioral modeling, and prediction", journal = j-TKDD, volume = "3", number = "2", pages = "6:1--6:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1514888.1514889", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:12 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mehler:2009:ENC, author = "Andrew Mehler and Steven Skiena", title = "Expanding network communities from representative examples", journal = j-TKDD, volume = "3", number = "2", pages = "7:1--7:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1514888.1514890", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:12 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present an approach to leverage a small subset of a coherent community within a social network into a much larger, more representative sample. Our problem becomes identifying a small conductance subgraph containing many (but not necessarily all) members of the given seed set. Starting with an initial seed set representing a sample of a community, we seek to discover as much of the full community as possible.\par We present a general method for network community expansion, demonstrating that our methods work well in expanding communities in real world networks starting from small given seed groups (20 to 400 members). Our approach is marked by incremental expansion from the seeds with retrospective analysis to determine the ultimate boundaries of our community. We demonstrate how to increase the robustness of the general approach through bootstrapping multiple random partitions of the input set into seed and evaluation groups.\par We go beyond statistical comparisons against gold standards to careful subjective evaluations of our expanded communities. This process explains the causes of most disagreement between our expanded communities and our gold-standards --- arguing that our expansion methods provide more reliable communities than can be extracted from reference sources/gazetteers such as Wikipedia.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "artificial intelligence; community discovery; Discrete mathematics; graph theory; news analysis; social networks", } @Article{Lin:2009:ACT, author = "Yu-Ru Lin and Yun Chi and Shenghuo Zhu and Hari Sundaram and Belle L. Tseng", title = "Analyzing communities and their evolutions in dynamic social networks", journal = j-TKDD, volume = "3", number = "2", pages = "8:1--8:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1514888.1514891", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:12 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We discover communities from social network data and analyze the community evolution. These communities are inherent characteristics of human interaction in online social networks, as well as paper citation networks. Also, communities may evolve over time, due to changes to individuals' roles and social status in the network as well as changes to individuals' research interests. We present an innovative algorithm that deviates from the traditional two-step approach to analyze community evolutions. In the traditional approach, communities are first detected for each time slice, and then compared to determine correspondences. We argue that this approach is inappropriate in applications with noisy data. In this paper, we propose {\em FacetNet\/} for analyzing communities and their evolutions through a robust {\em unified\/} process. This novel framework will discover communities and capture their evolution with temporal smoothness given by historic community structures. Our approach relies on formulating the problem in terms of maximum a posteriori (MAP) estimation, where the community structure is estimated both by the observed networked data and by the prior distribution given by historic community structures. Then we develop an iterative algorithm, with proven low time complexity, which is guaranteed to converge to an optimal solution. We perform extensive experimental studies, on both synthetic datasets and real datasets, to demonstrate that our method discovers meaningful communities and provides additional insights not directly obtainable from traditional methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Community; community net; evolution; evolution net; nonnegative matrix factorization; soft membership", } @Article{Kimura:2009:BLM, author = "Masahiro Kimura and Kazumi Saito and Hiroshi Motoda", title = "Blocking links to minimize contamination spread in a social network", journal = j-TKDD, volume = "3", number = "2", pages = "9:1--9:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1514888.1514892", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:12 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We address the problem of minimizing the propagation of undesirable things, such as computer viruses or malicious rumors, by blocking a limited number of links in a network, which is converse to the influence maximization problem in which the most influential nodes for information diffusion is searched in a social network. This minimization problem is more fundamental than the problem of preventing the spread of contamination by removing nodes in a network. We introduce two definitions for the contamination degree of a network, accordingly define two contamination minimization problems, and propose methods for efficiently finding good approximate solutions to these problems on the basis of a naturally greedy strategy. Using large social networks, we experimentally demonstrate that the proposed methods outperform conventional link-removal methods. We also show that unlike the case of blocking a limited number of nodes, the strategy of removing nodes with high out-degrees is not necessarily effective for these problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Contamination diffusion; link analysis; social networks", } @Article{Agichtein:2009:MIS, author = "Eugene Agichtein and Yandong Liu and Jiang Bian", title = "Modeling information-seeker satisfaction in community question answering", journal = j-TKDD, volume = "3", number = "2", pages = "10:1--10:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1514888.1514893", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Fri Apr 24 18:00:12 MDT 2009", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Question Answering Communities such as Naver, Baidu Knows, and Yahoo! Answers have emerged as popular, and often effective, means of information seeking on the web. By posting questions for other participants to answer, information seekers can obtain specific answers to their questions. Users of CQA portals have already contributed millions of questions, and received hundreds of millions of answers from other participants. However, CQA is not always effective: in some cases, a user may obtain a perfect answer within minutes, and in others it may require hours --- and sometimes days --- until a satisfactory answer is contributed. We investigate the problem of predicting information seeker satisfaction in collaborative question answering communities, where we attempt to predict whether a question author will be satisfied with the answers submitted by the community participants. We present a general prediction model, and develop a variety of content, structure, and community-focused features for this task. Our experimental results, obtained from a large-scale evaluation over thousands of real questions and user ratings, demonstrate the feasibility of modeling and predicting asker satisfaction. We complement our results with a thorough investigation of the interactions and information seeking patterns in question answering communities that correlate with information seeker satisfaction. We also explore {\em personalized\/} models of asker satisfaction, and show that when sufficient interaction history exists, personalization can significantly improve prediction accuracy over a ``one-size-fits-all'' model. Our models and predictions could be useful for a variety of applications, such as user intent inference, answer ranking, interface design, and query suggestion and routing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Community question answering; information seeker satisfaction", } @Article{Torvik:2009:AND, author = "Vetle I. Torvik and Neil R. Smalheiser", title = "Author name disambiguation in {MEDLINE}", journal = j-TKDD, volume = "3", number = "3", pages = "11:1--11:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1552303.1552304", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:36:58 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "{\em Background\/}: We recently described ``Author-ity,'' a model for estimating the probability that two articles in MEDLINE, sharing the same author name, were written by the same individual. Features include shared title words, journal name, coauthors, medical subject headings, language, affiliations, and author name features (middle initial, suffix, and prevalence in MEDLINE). Here we test the hypothesis that the Author-ity model will suffice to disambiguate author names for the vast majority of articles in MEDLINE. {\em Methods\/}: Enhancements include: (a) incorporating first names and their variants, email addresses, and correlations between specific last names and affiliation words; (b) new methods of generating large unbiased training sets; (c) new methods for estimating the prior probability; (d) a weighted least squares algorithm for correcting transitivity violations; and (e) a maximum likelihood based agglomerative algorithm for computing clusters of articles that represent inferred author-individuals. {\em Results\/}: Pairwise comparisons were computed for all author names on all 15.3 million articles in MEDLINE (2006 baseline), that share last name and first initial, to create Author-ity 2006, a database that has each name on each article assigned to one of 6.7 million inferred author-individual clusters. Recall is estimated at $ \approx 98.8 \% $. Lumping (putting two different individuals into the same cluster) affects $ \approx 0.5 \% $ of clusters, whereas splitting (assigning articles written by the same individual to $ > 1 $ cluster) affects $ \approx 2 \% $ of articles. {\em Impact\/}: The Author-ity model can be applied generally to other bibliographic databases. Author name disambiguation allows information retrieval and data integration to become {\em person-centered}, not just {\em document-centered}, setting the stage for new data mining and social network tools that will facilitate the analysis of scholarly publishing and collaboration behavior. {\em Availability\/}: The Author-ity 2006 database is available for nonprofit academic research, and can be freely queried via http://arrowsmith.psych.uic.edu.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "bibliographic databases; Name disambiguation", } @Article{Tu:2009:SDC, author = "Li Tu and Yixin Chen", title = "Stream data clustering based on grid density and attraction", journal = j-TKDD, volume = "3", number = "3", pages = "12:1--12:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1552303.1552305", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:36:58 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering real-time stream data is an important and challenging problem. Existing algorithms such as CluStream are based on the {\em k\/} -means algorithm. These clustering algorithms have difficulties finding clusters of arbitrary shapes and handling outliers. Further, they require the knowledge of {\em k\/} and user-specified time window. To address these issues, this article proposes {\em D-Stream}, a framework for clustering stream data using a density-based approach.\par Our algorithm uses an online component that maps each input data record into a grid and an offline component that computes the grid density and clusters the grids based on the density. The algorithm adopts a density decaying technique to capture the dynamic changes of a data stream and a attraction-based mechanism to accurately generate cluster boundaries.\par Exploiting the intricate relationships among the decay factor, attraction, data density, and cluster structure, our algorithm can efficiently and effectively generate and adjust the clusters in real time. Further, a theoretically sound technique is developed to detect and remove sporadic grids mapped by outliers in order to dramatically improve the space and time efficiency of the system. The technique makes high-speed data stream clustering feasible without degrading the clustering quality. The experimental results show that our algorithm has superior quality and efficiency, can find clusters of arbitrary shapes, and can accurately recognize the evolving behaviors of real-time data streams.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "clustering; data mining; density-based algorithms; Stream data", } @Article{Zhou:2009:LST, author = "Bin Zhou and Jian Pei", title = "Link spam target detection using page farms", journal = j-TKDD, volume = "3", number = "3", pages = "13:1--13:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1552303.1552306", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:36:58 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Currently, most popular Web search engines adopt some link-based ranking methods such as PageRank. Driven by the huge potential benefit of improving rankings of Web pages, many tricks have been attempted to boost page rankings. The most common way, which is known as link spam, is to make up some artificially designed link structures. Detecting link spam effectively is a big challenge. In this article, we develop novel and effective detection methods for link spam target pages using page farms. The essential idea is intuitive: whether a page is the beneficiary of link spam is reflected by how it collects its PageRank score. Technically, how a target page collects its PageRank score is modeled by a page farm, which consists of pages contributing a major portion of the PageRank score of the target page. We propose two spamicity measures based on page farms. They can be used as an effective measure to check whether the pages are link spam target pages. An empirical study using a newly available real dataset strongly suggests that our method is effective. It outperforms the state-of-the-art methods like SpamRank and SpamMass in both precision and recall.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Link Spam; Page Farm; PageRank", } @Article{Wan:2009:DBC, author = "Li Wan and Wee Keong Ng and Xuan Hong Dang and Philip S. Yu and Kuan Zhang", title = "Density-based clustering of data streams at multiple resolutions", journal = j-TKDD, volume = "3", number = "3", pages = "14:1--14:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1552303.1552307", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:36:58 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In data stream clustering, it is desirable to have algorithms that are able to detect clusters of arbitrary shape, clusters that evolve over time, and clusters with noise. Existing stream data clustering algorithms are generally based on an online-offline approach: The online component captures synopsis information from the data stream (thus, overcoming real-time and memory constraints) and the offline component generates clusters using the stored synopsis. The online-offline approach affects the overall performance of stream data clustering in various ways: the ease of deriving synopsis from streaming data; the complexity of data structure for storing and managing synopsis; and the frequency at which the offline component is used to generate clusters. In this article, we propose an algorithm that (1) computes and updates synopsis information in constant time; (2) allows users to discover clusters at multiple resolutions; (3) determines the right time for users to generate clusters from the synopsis information; (4) generates clusters of higher purity than existing algorithms; and (5) determines the right threshold function for density-based clustering based on the fading model of stream data. To the best of our knowledge, no existing data stream algorithms has all of these features. Experimental results show that our algorithm is able to detect arbitrarily shaped, evolving clusters with high quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Data mining algorithms; density based clustering; evolving data streams", } @Article{Mannila:2009:ATS, author = "Heikki Mannila and Dimitrios Gunopulos", title = "{ACM TKDD} special issue {ACM SIGKDD 2007} and {ACM SIGKDD 2008}", journal = j-TKDD, volume = "3", number = "4", pages = "15:1--15:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631163", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Asur:2009:EBF, author = "Sitaram Asur and Srinivasan Parthasarathy and Duygu Ucar", title = "An event-based framework for characterizing the evolutionary behavior of interaction graphs", journal = j-TKDD, volume = "3", number = "4", pages = "16:1--16:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631164", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Interaction graphs are ubiquitous in many fields such as bioinformatics, sociology and physical sciences. There have been many studies in the literature targeted at studying and mining these graphs. However, almost all of them have studied these graphs from a static point of view. The study of the evolution of these graphs over time can provide tremendous insight on the behavior of entities, communities and the flow of information among them. In this work, we present an event-based characterization of critical behavioral patterns for temporally varying interaction graphs. We use nonoverlapping snapshots of interaction graphs and develop a framework for capturing and identifying interesting events from them. We use these events to characterize complex behavioral patterns of individuals and communities over time. We show how semantic information can be incorporated to reason about community-behavior events. We also demonstrate the application of behavioral patterns for the purposes of modeling evolution, link prediction and influence maximization. Finally, we present a diffusion model for evolving networks, based on our framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "diffusion of innovations; Dynamic interaction networks; evolutionary analysis", } @Article{Chi:2009:ESC, author = "Yun Chi and Xiaodan Song and Dengyong Zhou and Koji Hino and Belle L. Tseng", title = "On evolutionary spectral clustering", journal = j-TKDD, volume = "3", number = "4", pages = "17:1--17:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631165", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Evolutionary clustering is an emerging research area essential to important applications such as clustering dynamic Web and blog contents and clustering data streams. In evolutionary clustering, a good clustering result should fit the current data well, while simultaneously not deviate too dramatically from the recent history. To fulfill this dual purpose, a measure of {\em temporal smoothness\/} is integrated in the overall measure of clustering quality. In this article, we propose two frameworks that incorporate temporal smoothness in evolutionary spectral clustering. For both frameworks, we start with intuitions gained from the well-known {\em k\/} -means clustering problem, and then propose and solve corresponding cost functions for the evolutionary spectral clustering problems. Our solutions to the evolutionary spectral clustering problems provide more stable and consistent clustering results that are less sensitive to short-term noises while at the same time are adaptive to long-term cluster drifts. Furthermore, we demonstrate that our methods provide the optimal solutions to the relaxed versions of the corresponding evolutionary {\em k\/} -means clustering problems. Performance experiments over a number of real and synthetic data sets illustrate our evolutionary spectral clustering methods provide more robust clustering results that are not sensitive to noise and can adapt to data drifts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Evolutionary spectral clustering; preserving cluster membership; preserving cluster quality; temporal smoothness", } @Article{Fujiwara:2009:FLS, author = "Yasuhiro Fujiwara and Yasushi Sakurai and Masaru Kitsuregawa", title = "Fast likelihood search for hidden {Markov} models", journal = j-TKDD, volume = "3", number = "4", pages = "18:1--18:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631166", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Hidden Markov models (HMMs) are receiving considerable attention in various communities and many applications that use HMMs have emerged such as mental task classification, biological analysis, traffic monitoring, and anomaly detection. This article has two goals; The first goal is exact and efficient identification of the model whose state sequence has the highest likelihood for the given query sequence (more precisely, no HMM that actually has a high-probability path for the given sequence is missed by the algorithm), and the second goal is exact and efficient monitoring of streaming data sequences to find the best model. We propose SPIRAL, a fast search method for HMM datasets. SPIRAL is based on three ideas; (1) it clusters states of models to compute approximate likelihood, (2) it uses several granularities and approximates likelihood values in search processing, and (3) it focuses on just the promising likelihood computations by pruning out low-likelihood state sequences. Experiments verify the effectiveness of SPIRAL and show that it is more than 490 times faster than the naive method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Hidden Markov model; likelihood; upper bound", } @Article{Zhang:2009:EAG, author = "Xiang Zhang and Fei Zou and Wei Wang", title = "Efficient algorithms for genome-wide association study", journal = j-TKDD, volume = "3", number = "4", pages = "19:1--19:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631167", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Studying the association between quantitative phenotype (such as height or weight) and single nucleotide polymorphisms (SNPs) is an important problem in biology. To understand underlying mechanisms of complex phenotypes, it is often necessary to consider joint genetic effects across multiple SNPs. ANOVA (analysis of variance) test is routinely used in association study. Important findings from studying gene-gene (SNP-pair) interactions are appearing in the literature. However, the number of SNPs can be up to millions. Evaluating joint effects of SNPs is a challenging task even for SNP-pairs. Moreover, with large number of SNPs correlated, permutation procedure is preferred over simple Bonferroni correction for properly controlling family-wise error rate and retaining mapping power, which dramatically increases the computational cost of association study.\par In this article, we study the problem of finding SNP-pairs that have significant associations with a given quantitative phenotype. We propose an efficient algorithm, FastANOVA, for performing ANOVA tests on SNP-pairs in a batch mode, which also supports large permutation test. We derive an upper bound of SNP-pair ANOVA test, which can be expressed as the sum of two terms. The first term is based on single-SNP ANOVA test. The second term is based on the SNPs and independent of any phenotype permutation. Furthermore, SNP-pairs can be organized into groups, each of which shares a common upper bound. This allows for maximum reuse of intermediate computation, efficient upper bound estimation, and effective SNP-pair pruning. Consequently, FastANOVA only needs to perform the ANOVA test on a small number of candidate SNP-pairs without the risk of missing any significant ones. Extensive experiments demonstrate that FastANOVA is orders of magnitude faster than the brute-force implementation of ANOVA tests on all SNP pairs. The principles used in FastANOVA can be applied to categorical phenotypes and other statistics such as Chi-square test.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "ANOVA test; Association study; permutation test", } @Article{Bilgic:2009:RCM, author = "Mustafa Bilgic and Lise Getoor", title = "Reflect and correct: a misclassification prediction approach to active inference", journal = j-TKDD, volume = "3", number = "4", pages = "20:1--20:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631168", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Information diffusion, viral marketing, graph-based semi-supervised learning, and collective classification all attempt to model and exploit the relationships among nodes in a network to improve the performance of node labeling algorithms. However, sometimes the advantage of exploiting the relationships can become a disadvantage. Simple models like label propagation and iterative classification can aggravate a misclassification by propagating mistakes in the network, while more complex models that define and optimize a global objective function, such as Markov random fields and graph mincuts, can misclassify a set of nodes jointly. This problem can be mitigated if the classification system is allowed to ask for the correct labels for a few of the nodes during inference. However, determining the optimal set of labels to acquire is intractable under relatively general assumptions, which forces us to resort to approximate and heuristic techniques. We describe three such techniques in this article. The first one is based on directly approximating the value of the objective function of label acquisition and greedily acquiring the label that provides the most improvement. The second technique is a simple technique based on the analogy we draw between viral marketing and label acquisition. Finally, we propose a method, which we refer to as {\em reflect and correct}, that can learn and predict when the classification system is likely to make mistakes and suggests acquisitions to correct those mistakes. We empirically show on a variety of synthetic and real-world datasets that the reflect and correct method significantly outperforms the other two techniques, as well as other approaches based on network structural measures such as node degree and network clustering.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Active inference; collective classification; information diffusion; label acquisition; viral marketing", } @Article{Kiernan:2009:CCS, author = "Jerry Kiernan and Evimaria Terzi", title = "Constructing comprehensive summaries of large event sequences", journal = j-TKDD, volume = "3", number = "4", pages = "21:1--21:??", month = nov, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1631162.1631169", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:13 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Event sequences capture system and user activity over time. Prior research on sequence mining has mostly focused on discovering local patterns appearing in a sequence. While interesting, these patterns do not give a comprehensive summary of the entire event sequence. Moreover, the number of patterns discovered can be large. In this article, we take an alternative approach and build {\em short\/} summaries that describe an entire sequence, and discover local dependencies between event types.\par We formally define the summarization problem as an optimization problem that balances shortness of the summary with accuracy of the data description. We show that this problem can be solved optimally in polynomial time by using a combination of two dynamic-programming algorithms. We also explore more efficient greedy alternatives and demonstrate that they work well on large datasets. Experiments on both synthetic and real datasets illustrate that our algorithms are efficient and produce high-quality results, and reveal interesting local structures in the data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Event sequences; log mining; summarization", } @Article{Koren:2010:FNS, author = "Yehuda Koren", title = "Factor in the neighbors: {Scalable} and accurate collaborative filtering", journal = j-TKDD, volume = "4", number = "1", pages = "1:1--1:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1644873.1644874", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:37 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recommender systems provide users with personalized suggestions for products or services. These systems often rely on collaborating filtering (CF), where past transactions are analyzed in order to establish connections between users and products. The most common approach to CF is based on neighborhood models, which originate from similarities between products or users. In this work we introduce a new neighborhood model with an improved prediction accuracy. Unlike previous approaches that are based on heuristic similarities, we model neighborhood relations by minimizing a global cost function. Further accuracy improvements are achieved by extending the model to exploit both explicit and implicit feedback by the users. Past models were limited by the need to compute all pairwise similarities between items or users, which grow quadratically with input size. In particular, this limitation vastly complicates adopting user similarity models, due to the typical large number of users. Our new model solves these limitations by factoring the neighborhood model, thus making both item-item and user-user implementations scale linearly with the size of the data. The methods are tested on the Netflix data, with encouraging results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "collaborative filtering; Netflix Prize; Recommender systems", } @Article{Syed:2010:MDP, author = "Zeeshan Syed and Collin Stultz and Manolis Kellis and Piotr Indyk and John Guttag", title = "Motif discovery in physiological datasets: a methodology for inferring predictive elements", journal = j-TKDD, volume = "4", number = "1", pages = "2:1--2:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1644873.1644875", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:37 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article, we propose a methodology for identifying predictive physiological patterns in the absence of prior knowledge. We use the principle of conservation to identify activity that consistently precedes an outcome in patients, and describe a two-stage process that allows us to efficiently search for such patterns in large datasets. This involves first transforming continuous physiological signals from patients into symbolic sequences, and then searching for patterns in these reduced representations that are strongly associated with an outcome.\par Our strategy of identifying conserved activity that is unlikely to have occurred purely by chance in symbolic data is analogous to the discovery of regulatory motifs in genomic datasets. We build upon existing work in this area, generalizing the notion of a regulatory motif and enhancing current techniques to operate robustly on non-genomic data. We also address two significant considerations associated with motif discovery in general: computational efficiency and robustness in the presence of degeneracy and noise. To deal with these issues, we introduce the concept of active regions and new subset-based techniques such as a two-layer Gibbs sampling algorithm. These extensions allow for a framework for information inference, where precursors are identified as approximately conserved activity of arbitrary complexity preceding multiple occurrences of an event.\par We evaluated our solution on a population of patients who experienced sudden cardiac death and attempted to discover electrocardiographic activity that may be associated with the endpoint of death. To assess the predictive patterns discovered, we compared likelihood scores for motifs in the sudden death population against control populations of normal individuals and those with non-fatal supraventricular arrhythmias. Our results suggest that predictive motif discovery may be able to identify clinically relevant information even in the absence of significant prior knowledge.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "data mining; Gibbs sampling; inference; knowledge discovery; motifs; physiological signals", } @Article{Webb:2010:SSI, author = "Geoffrey I. Webb", title = "Self-sufficient itemsets: an approach to screening potentially interesting associations between items", journal = j-TKDD, volume = "4", number = "1", pages = "3:1--3:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1644873.1644876", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:37 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Self-sufficient itemsets are those whose frequency cannot be explained solely by the frequency of either their subsets or of their supersets. We argue that itemsets that are not self-sufficient will often be of little interest to the data analyst, as their frequency should be expected once that of the itemsets on which their frequency depends is known. We present tests for statistically sound discovery of self-sufficient itemsets, and computational techniques that allow those tests to be applied as a post-processing step for any itemset discovery algorithm. We also present a measure for assessing the degree of potential interest in an itemset that complements these statistical measures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Association discovery; association rules; itemset discovery; itemset screening; statistical evaluation", } @Article{Plantevit:2010:MMM, author = "Marc Plantevit and Anne Laurent and Dominique Laurent and Maguelonne Teisseire and Yeow Wei Choong", title = "Mining multidimensional and multilevel sequential patterns", journal = j-TKDD, volume = "4", number = "1", pages = "4:1--4:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1644873.1644877", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:37 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multidimensional databases have been designed to provide decision makers with the necessary tools to help them understand their data. This framework is different from transactional data as the datasets contain huge volumes of historicized and aggregated data defined over a set of dimensions that can be arranged through multiple levels of granularities. Many tools have been proposed to query the data and navigate through the levels of granularity. However, automatic tools are still missing to mine this type of data in order to discover regular specific patterns. In this article, we present a method for mining sequential patterns from multidimensional databases, at the same time taking advantage of the different dimensions and levels of granularity, which is original compared to existing work. The necessary definitions and algorithms are extended from regular sequential patterns to this particular case. Experiments are reported, showing the significance of this approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "frequent patterns; hierarchy; multidimensional databases; multilevel patterns; Sequential patterns", } @Article{Zaki:2010:VVO, author = "Mohammed J. Zaki and Christopher D. Carothers and Boleslaw K. Szymanski", title = "{VOGUE}: a variable order hidden {Markov} model with duration based on frequent sequence mining", journal = j-TKDD, volume = "4", number = "1", pages = "5:1--5:??", month = jan, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1644873.1644878", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Tue Mar 16 18:37:37 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present VOGUE, a novel, variable order hidden Markov model with state durations, that combines two separate techniques for modeling complex patterns in sequential data: pattern mining and data modeling. VOGUE relies on a variable gap sequence mining method to extract frequent patterns with different lengths and gaps between elements. It then uses these mined sequences to build a variable order hidden Markov model (HMM), that explicitly models the gaps. The gaps implicitly model the order of the HMM, and they explicitly model the duration of each state. We apply VOGUE to a variety of real sequence data taken from domains such as protein sequence classification, Web usage logs, intrusion detection, and spelling correction. We show that VOGUE has superior classification accuracy compared to regular HMMs, higher-order HMMs, and even special purpose HMMs like HMMER, which is a state-of-the-art method for protein classification. The VOGUE implementation and the datasets used in this article are available as open-source.$^1$", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "Hidden Markov models; higher-order HMM; HMM with duration; sequence mining and modeling; variable-order HMM", } @Article{Vadera:2010:CCS, author = "Sunil Vadera", title = "{CSNL}: a cost-sensitive non-linear decision tree algorithm", journal = j-TKDD, volume = "4", number = "2", pages = "6:1--6:??", month = may, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1754428.1754429", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Sat Aug 14 17:12:30 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article presents a new decision tree learning algorithm called CSNL that induces Cost-Sensitive Non-Linear decision trees. The algorithm is based on the hypothesis that nonlinear decision nodes provide a better basis than axis-parallel decision nodes and utilizes discriminant analysis to construct nonlinear decision trees that take account of costs of misclassification.\par The performance of the algorithm is evaluated by applying it to seventeen datasets and the results are compared with those obtained by two well known cost-sensitive algorithms, ICET and MetaCost, which generate multiple trees to obtain some of the best results to date. The results show that CSNL performs at least as well, if not better than these algorithms, in more than twelve of the datasets and is considerably faster. The use of bagging with CSNL further enhances its performance showing the significant benefits of using nonlinear decision nodes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "cost-sensitive learning; Decision tree learning", } @Article{Kandylas:2010:AKC, author = "Vasileios Kandylas and S. Phineas Upham and Lyle H. Ungar", title = "Analyzing knowledge communities using foreground and background clusters", journal = j-TKDD, volume = "4", number = "2", pages = "7:1--7:??", month = may, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1754428.1754430", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Sat Aug 14 17:12:30 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Insight into the growth (or shrinkage) of ``knowledge communities'' of authors that build on each other's work can be gained by studying the evolution over time of clusters of documents. We cluster documents based on the documents they cite in common using the Streemer clustering method, which finds cohesive foreground clusters (the knowledge communities) embedded in a diffuse background. We build predictive models with features based on the citation structure, the vocabulary of the papers, and the affiliations and prestige of the authors and use these models to study the drivers of community growth and the predictors of how widely a paper will be cited. We find that scientific knowledge communities tend to grow more rapidly if their publications build on diverse information and use narrow vocabulary and that papers that lie on the periphery of a community have the highest impact, while those not in any community have the lowest impact.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "citation analysis; clustering; community evolution; knowledge communities; Text mining", } @Article{Ji:2010:SSL, author = "Shuiwang Ji and Lei Tang and Shipeng Yu and Jieping Ye", title = "A shared-subspace learning framework for multi-label classification", journal = j-TKDD, volume = "4", number = "2", pages = "8:1--8:??", month = may, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1754428.1754431", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Sat Aug 14 17:12:30 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multi-label problems arise in various domains such as multi-topic document categorization, protein function prediction, and automatic image annotation. One natural way to deal with such problems is to construct a binary classifier for each label, resulting in a set of independent binary classification problems. Since multiple labels share the same input space, and the semantics conveyed by different labels are usually correlated, it is essential to exploit the correlation information contained in different labels. In this paper, we consider a general framework for extracting shared structures in multi-label classification. In this framework, a common subspace is assumed to be shared among multiple labels. We show that the optimal solution to the proposed formulation can be obtained by solving a generalized eigenvalue problem, though the problem is nonconvex. For high-dimensional problems, direct computation of the solution is expensive, and we develop an efficient algorithm for this case. One appealing feature of the proposed framework is that it includes several well-known algorithms as special cases, thus elucidating their intrinsic relationships. We further show that the proposed framework can be extended to the kernel-induced feature space. We have conducted extensive experiments on multi-topic web page categorization and automatic gene expression pattern image annotation tasks, and results demonstrate the effectiveness of the proposed formulation in comparison with several representative algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "gene expression pattern image annotation; kernel methods; least squares loss; Multi-label classification; shared subspace; singular value decomposition; web page categorization", } @Article{Ruggieri:2010:DMD, author = "Salvatore Ruggieri and Dino Pedreschi and Franco Turini", title = "Data mining for discrimination discovery", journal = j-TKDD, volume = "4", number = "2", pages = "9:1--9:??", month = may, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1754428.1754432", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Sat Aug 14 17:12:30 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In the context of civil rights law, discrimination refers to unfair or unequal treatment of people based on membership to a category or a minority, without regard to individual merit. Discrimination in credit, mortgage, insurance, labor market, and education has been investigated by researchers in economics and human sciences. With the advent of automatic decision support systems, such as credit scoring systems, the ease of data collection opens several challenges to data analysts for the fight against discrimination. In this article, we introduce the problem of discovering discrimination through data mining in a dataset of historical decision records, taken by humans or by automatic systems. We formalize the processes of direct and indirect discrimination discovery by modelling protected-by-law groups and contexts where discrimination occurs in a classification rule based syntax. Basically, classification rules extracted from the dataset allow for unveiling contexts of unlawful discrimination, where the degree of burden over protected-by-law groups is formalized by an extension of the lift measure of a classification rule. In direct discrimination, the extracted rules can be directly mined in search of discriminatory contexts. In indirect discrimination, the mining process needs some background knowledge as a further input, for example, census data, that combined with the extracted rules might allow for unveiling contexts of discriminatory decisions. A strategy adopted for combining extracted classification rules with background knowledge is called an inference model. In this article, we propose two inference models and provide automatic procedures for their implementation. An empirical assessment of our results is provided on the German credit dataset and on the PKDD Discovery Challenge 1999 financial dataset.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", keywords = "classification rules; Discrimination", } @Article{Thomas:2010:MMF, author = "Lini T. Thomas and Satyanarayana R. Valluri and Kamalakar Karlapalem", title = "{MARGIN}: {Maximal} frequent subgraph mining", journal = j-TKDD, volume = "4", number = "3", pages = "10:1--10:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1839490.1839491", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:57 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Deodhar:2010:SFS, author = "Meghana Deodhar and Joydeep Ghosh", title = "{SCOAL}: a framework for simultaneous co-clustering and learning from complex data", journal = j-TKDD, volume = "4", number = "3", pages = "11:1--11:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1839490.1839492", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:57 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2010:BBI, author = "Jinlin Chen and Keli Xiao", title = "{BISC}: a bitmap itemset support counting approach for efficient frequent itemset mining", journal = j-TKDD, volume = "4", number = "3", pages = "12:1--12:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1839490.1839493", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:57 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Becchetti:2010:EAL, author = "Luca Becchetti and Paolo Boldi and Carlos Castillo and Aristides Gionis", title = "Efficient algorithms for large-scale local triangle counting", journal = j-TKDD, volume = "4", number = "3", pages = "13:1--13:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1839490.1839494", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:57 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2010:MDR, author = "Yin Zhang and Zhi-Hua Zhou", title = "Multilabel dimensionality reduction via dependence maximization", journal = j-TKDD, volume = "4", number = "3", pages = "14:1--14:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1839490.1839495", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:57 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cui:2010:LMN, author = "Ying Cui and Xiaoli Z. Fern and Jennifer G. Dy", title = "Learning multiple nonredundant clusterings", journal = j-TKDD, volume = "4", number = "3", pages = "15:1--15:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1839490.1839496", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:57 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2010:TSI, author = "Wei Wang", title = "{TKDD} Special Issue: {SIGKDD 2009}", journal = j-TKDD, volume = "4", number = "4", pages = "16:1--16:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1857947.1857948", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:58 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2010:BTA, author = "Ye Chen and Dmitry Pavlov and John F. Canny", title = "Behavioral Targeting: The Art of Scaling Up Simple Algorithms", journal = j-TKDD, volume = "4", number = "4", pages = "17:1--17:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1857947.1857949", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:58 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mohammed:2010:CDA, author = "Noman Mohammed and Benjamin C. M. Fung and Patrick C. K. Hung and Cheuk-Kwong Lee", title = "Centralized and Distributed Anonymization for High-Dimensional Healthcare Data", journal = j-TKDD, volume = "4", number = "4", pages = "18:1--18:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1857947.1857950", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:58 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2010:BBM, author = "Chao Liu and Fan Guo and Christos Faloutsos", title = "{Bayesian} Browsing Model: Exact Inference of Document Relevance from Petabyte-Scale Data", journal = j-TKDD, volume = "4", number = "4", pages = "19:1--19:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1857947.1857951", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:58 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2010:MAF, author = "Mingxi Wu and Chris Jermaine and Sanjay Ranka and Xiuyao Song and John Gums", title = "A Model-Agnostic Framework for Fast Spatial Anomaly Detection", journal = j-TKDD, volume = "4", number = "4", pages = "20:1--20:??", month = oct, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1857947.1857952", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:58 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhong:2010:ATS, author = "Ning Zhong and Gregory Piatetsky-Shapiro and Yiyu Yao and Philip S. Yu", title = "{ACM TKDD} Special Issue on Knowledge Discovery for {Web} Intelligence", journal = j-TKDD, volume = "5", number = "1", pages = "1:1--1:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1870096.1870097", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2010:CAW, author = "Jie Tang and Limin Yao and Duo Zhang and Jing Zhang", title = "A Combination Approach to {Web} User Profiling", journal = j-TKDD, volume = "5", number = "1", pages = "2:1--2:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1870096.1870098", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bouguessa:2010:DKS, author = "Mohamed Bouguessa and Shengrui Wang and Benoit Dumoulin", title = "Discovering Knowledge-Sharing Communities in Question-Answering Forums", journal = j-TKDD, volume = "5", number = "1", pages = "3:1--3:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1870096.1870099", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Plangprasopchok:2010:MSA, author = "Anon Plangprasopchok and Kristina Lerman", title = "Modeling Social Annotation: a {Bayesian} Approach", journal = j-TKDD, volume = "5", number = "1", pages = "4:1--4:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1870096.1870100", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sakurai:2010:FDG, author = "Yasushi Sakurai and Christos Faloutsos and Spiros Papadimitriou", title = "Fast Discovery of Group Lag Correlations in Streams", journal = j-TKDD, volume = "5", number = "1", pages = "5:1--5:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1870096.1870101", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2010:FCP, author = "Kun Liu and Evimaria Terzi", title = "A Framework for Computing the Privacy Scores of Users in Online Social Networks", journal = j-TKDD, volume = "5", number = "1", pages = "6:1--6:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1870096.1870102", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:43:59 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2011:ISI, author = "Jimeng Sun and Yan Liu and Jie Tang and Chid Apte", title = "Introduction to Special Issue on Large-Scale Data Mining", journal = j-TKDD, volume = "5", number = "2", pages = "7:1--7:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921633", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kang:2011:HMR, author = "U. Kang and Charalampos E. Tsourakakis and Ana Paula Appel and Christos Faloutsos and Jure Leskovec", title = "{HADI}: Mining Radii of Large Graphs", journal = j-TKDD, volume = "5", number = "2", pages = "8:1--8:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921634", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{deVries:2011:RRL, author = "Timothy de Vries and Hui Ke and Sanjay Chawla and Peter Christen", title = "Robust Record Linkage Blocking Using Suffix Arrays and {Bloom} Filters", journal = j-TKDD, volume = "5", number = "2", pages = "9:1--9:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921635", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dunlavy:2011:TLP, author = "Daniel M. Dunlavy and Tamara G. Kolda and Evrim Acar", title = "Temporal Link Prediction Using Matrix and Tensor Factorizations", journal = j-TKDD, volume = "5", number = "2", pages = "10:1--10:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921636", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Magdalinos:2011:ECQ, author = "Panagis Magdalinos and Christos Doulkeridis and Michalis Vazirgiannis", title = "Enhancing Clustering Quality through Landmark-Based Dimensionality Reduction", journal = j-TKDD, volume = "5", number = "2", pages = "11:1--11:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921637", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2011:CLA, author = "Hong Cheng and Yang Zhou and Jeffrey Xu Yu", title = "Clustering Large Attributed Graphs: a Balance between Structural and Attribute Similarities", journal = j-TKDD, volume = "5", number = "2", pages = "12:1--12:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921638", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Menon:2011:FAA, author = "Aditya Krishna Menon and Charles Elkan", title = "Fast Algorithms for Approximating the Singular Value Decomposition", journal = j-TKDD, volume = "5", number = "2", pages = "13:1--13:??", month = feb, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1921632.1921639", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Mon Mar 28 11:44:01 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A low-rank approximation to a matrix $A$ is a matrix with significantly smaller rank than $A$, and which is close to $A$ according to some norm. Many practical applications involving the use of large matrices focus on low-rank approximations. By reducing the rank or dimensionality of the data, we reduce the complexity of analyzing the data. The singular value decomposition is the most popular low-rank matrix approximation. However, due to its expensive computational requirements, it has often been considered intractable for practical applications involving massive data. Recent developments have tried to address this problem, with several methods proposed to approximate the decomposition with better asymptotic runtime. We present an empirical study of these techniques on a variety of dense and sparse datasets. We find that a sampling approach of Drineas, Kannan and Mahoney is often, but not always, the best performing method. This method gives solutions with high accuracy much faster than classical SVD algorithms, on large sparse datasets in particular. Other modern methods, such as a recent algorithm by Rokhlin and Tygert, also offer savings compared to classical SVD algorithms. The older sampling methods of Achlioptas and McSherry are shown to sometimes take longer than classical SVD.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2011:IDC, author = "Dingding Wang and Shenghuo Zhu and Tao Li and Yun Chi and Yihong Gong", title = "Integrating Document Clustering and Multidocument Summarization", journal = j-TKDD, volume = "5", number = "3", pages = "14:1--14:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993077.1993078", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Thu Aug 18 13:28:08 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Maier:2011:INS, author = "Marc Maier and Matthew Rattigan and David Jensen", title = "Indexing Network Structure with Shortest-Path Trees", journal = j-TKDD, volume = "5", number = "3", pages = "15:1--15:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993077.1993079", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Thu Aug 18 13:28:08 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wong:2011:CUA, author = "Raymond Chi-Wing Wong and Ada Wai-Chee Fu and Ke Wang and Philip S. Yu and Jian Pei", title = "Can the Utility of Anonymized Data be Used for Privacy Breaches?", journal = j-TKDD, volume = "5", number = "3", pages = "16:1--16:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993077.1993080", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Thu Aug 18 13:28:08 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2011:CDM, author = "Yu-Ru Lin and Jimeng Sun and Hari Sundaram and Aisling Kelliher and Paul Castro and Ravi Konuru", title = "Community Discovery via Metagraph Factorization", journal = j-TKDD, volume = "5", number = "3", pages = "17:1--17:??", month = aug, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1993077.1993081", ISSN = "1556-4681 (print), 1556-472X (electronic)", bibdate = "Thu Aug 18 13:28:08 MDT 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Elkan:2012:GES, author = "Charles Elkan and Yehuda Koren", title = "Guest Editorial for Special Issue {KDD'10}", journal = j-TKDD, volume = "5", number = "4", pages = "18:1--18:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086738", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Iwata:2012:SMT, author = "Tomoharu Iwata and Takeshi Yamada and Yasushi Sakurai and Naonori Ueda", title = "Sequential Modeling of Topic Dynamics with Multiple Timescales", journal = j-TKDD, volume = "5", number = "4", pages = "19:1--19:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086739", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We propose an online topic model for sequentially analyzing the time evolution of topics in document collections. Topics naturally evolve with multiple timescales. For example, some words may be used consistently over one hundred years, while other words emerge and disappear over periods of a few days. Thus, in the proposed model, current topic-specific distributions over words are assumed to be generated based on the multiscale word distributions of the previous epoch. Considering both the long- and short-timescale dependency yields a more robust model. We derive efficient online inference procedures based on a stochastic EM algorithm, in which the model is sequentially updated using newly obtained data; this means that past data are not required to make the inference. We demonstrate the effectiveness of the proposed method in terms of predictive performance and computational efficiency by examining collections of real documents with timestamps.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huh:2012:DTM, author = "Seungil Huh and Stephen E. Fienberg", title = "Discriminative Topic Modeling Based on Manifold Learning", journal = j-TKDD, volume = "5", number = "4", pages = "20:1--20:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086740", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Topic modeling has become a popular method used for data analysis in various domains including text documents. Previous topic model approaches, such as probabilistic Latent Semantic Analysis (pLSA) and Latent Dirichlet Allocation (LDA), have shown impressive success in discovering low-rank hidden structures for modeling text documents. These approaches, however do not take into account the manifold structure of the data, which is generally informative for nonlinear dimensionality reduction mapping. More recent topic model approaches, Laplacian PLSI (LapPLSI) and Locally-consistent Topic Model (LTM), have incorporated the local manifold structure into topic models and have shown resulting benefits. But they fall short of achieving full discriminating power of manifold learning as they only enhance the proximity between the low-rank representations of neighboring pairs without any consideration for non-neighboring pairs. In this article, we propose a new approach, Discriminative Topic Model (DTM), which separates non-neighboring pairs from each other in addition to bringing neighboring pairs closer together, thereby preserving the global manifold structure as well as improving local consistency. We also present a novel model-fitting algorithm based on the generalized EM algorithm and the concept of Pareto improvement. We empirically demonstrate the success of DTM in terms of unsupervised clustering and semisupervised classification accuracies on text corpora and robustness to parameters compared to state-of-the-art techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gomez-Rodriguez:2012:IND, author = "Manuel Gomez-Rodriguez and Jure Leskovec and Andreas Krause", title = "Inferring Networks of Diffusion and Influence", journal = j-TKDD, volume = "5", number = "4", pages = "21:1--21:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086741", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Information diffusion and virus propagation are fundamental processes taking place in networks. While it is often possible to directly observe when nodes become infected with a virus or publish the information, observing individual transmissions (who infects whom, or who influences whom) is typically very difficult. Furthermore, in many applications, the underlying network over which the diffusions and propagations spread is actually unobserved. We tackle these challenges by developing a method for tracing paths of diffusion and influence through networks and inferring the networks over which contagions propagate. Given the times when nodes adopt pieces of information or become infected, we identify the optimal network that best explains the observed infection times. Since the optimization problem is NP-hard to solve exactly, we develop an efficient approximation algorithm that scales to large datasets and finds provably near-optimal networks. We demonstrate the effectiveness of our approach by tracing information diffusion in a set of 170 million blogs and news articles over a one year period to infer how information flows through the online media space. We find that the diffusion network of news for the top 1,000 media sites and blogs tends to have a core-periphery structure with a small set of core media sites that diffuse information to the rest of the Web. These sites tend to have stable circles of influence with more general news media sites acting as connectors between them.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2012:LIS, author = "Jianhui Chen and Ji Liu and Jieping Ye", title = "Learning Incoherent Sparse and Low-Rank Patterns from Multiple Tasks", journal = j-TKDD, volume = "5", number = "4", pages = "22:1--22:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086742", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We consider the problem of learning incoherent sparse and low-rank patterns from multiple tasks. Our approach is based on a linear multitask learning formulation, in which the sparse and low-rank patterns are induced by a cardinality regularization term and a low-rank constraint, respectively. This formulation is nonconvex; we convert it into its convex surrogate, which can be routinely solved via semidefinite programming for small-size problems. We propose employing the general projected gradient scheme to efficiently solve such a convex surrogate; however, in the optimization formulation, the objective function is nondifferentiable and the feasible domain is nontrivial. We present the procedures for computing the projected gradient and ensuring the global convergence of the projected gradient scheme. The computation of the projected gradient involves a constrained optimization problem; we show that the optimal solution to such a problem can be obtained via solving an unconstrained optimization subproblem and a Euclidean projection subproblem. We also present two projected gradient algorithms and analyze their rates of convergence in detail. In addition, we illustrate the use of the presented projected gradient algorithms for the proposed multitask learning formulation using the least squares loss. Experimental results on a collection of real-world data sets demonstrate the effectiveness of the proposed multitask learning formulation and the efficiency of the proposed projected gradient algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2012:LLC, author = "Hsiang-Fu Yu and Cho-Jui Hsieh and Kai-Wei Chang and Chih-Jen Lin", title = "Large Linear Classification When Data Cannot Fit in Memory", journal = j-TKDD, volume = "5", number = "4", pages = "23:1--23:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086743", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recent advances in linear classification have shown that for applications such as document classification, the training process can be extremely efficient. However, most of the existing training methods are designed by assuming that data can be stored in the computer memory. These methods cannot be easily applied to data larger than the memory capacity due to the random access to the disk. We propose and analyze a block minimization framework for data larger than the memory size. At each step a block of data is loaded from the disk and handled by certain learning methods. We investigate two implementations of the proposed framework for primal and dual SVMs, respectively. Because data cannot fit in memory, many design considerations are very different from those for traditional algorithms. We discuss and compare with existing approaches that are able to handle data larger than memory. Experiments using data sets 20 times larger than the memory demonstrate the effectiveness of the proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shahaf:2012:CTL, author = "Dafna Shahaf and Carlos Guestrin", title = "Connecting Two (or Less) Dots: Discovering Structure in News Articles", journal = j-TKDD, volume = "5", number = "4", pages = "24:1--24:??", month = feb, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086737.2086744", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 16 15:19:57 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Finding information is becoming a major part of our daily life. Entire sectors, from Web users to scientists and intelligence analysts, are increasingly struggling to keep up with the larger and larger amounts of content published every day. With this much data, it is often easy to miss the big picture. In this article, we investigate methods for automatically connecting the dots---providing a structured, easy way to navigate within a new topic and discover hidden connections. We focus on the news domain: given two news articles, our system automatically finds a coherent chain linking them together. For example, it can recover the chain of events starting with the decline of home prices (January 2007), and ending with the health care debate (2009). We formalize the characteristics of a good chain and provide a fast search-driven algorithm to connect two fixed endpoints. We incorporate user feedback into our framework, allowing the stories to be refined and personalized. We also provide a method to handle partially-specified endpoints, for users who do not know both ends of a story. Finally, we evaluate our algorithm over real news data. Our user studies demonstrate that the objective we propose captures the users' intuitive notion of coherence, and that our algorithm effectively helps users understand the news.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ienco:2012:CDL, author = "Dino Ienco and Ruggero G. Pensa and Rosa Meo", title = "From Context to Distance: Learning Dissimilarity for Categorical Data Clustering", journal = j-TKDD, volume = "6", number = "1", pages = "1:1--1:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2133360.2133361", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering data described by categorical attributes is a challenging task in data mining applications. Unlike numerical attributes, it is difficult to define a distance between pairs of values of a categorical attribute, since the values are not ordered. In this article, we propose a framework to learn a context-based distance for categorical attributes. The key intuition of this work is that the distance between two values of a categorical attribute A$_i$ can be determined by the way in which the values of the other attributes A$_j$ are distributed in the dataset objects: if they are similarly distributed in the groups of objects in correspondence of the distinct values of A$_i$ a low value of distance is obtained. We propose also a solution to the critical point of the choice of the attributes A$_j$. We validate our approach by embedding our distance learning framework in a hierarchical clustering algorithm. We applied it on various real world and synthetic datasets, both low and high-dimensional. Experimental results show that our method is competitive with respect to the state of the art of categorical data clustering approaches. We also show that our approach is scalable and has a low impact on the overall computational time of a clustering task.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2012:EMG, author = "Chun Li and Qingyan Yang and Jianyong Wang and Ming Li", title = "Efficient Mining of Gap-Constrained Subsequences and Its Various Applications", journal = j-TKDD, volume = "6", number = "1", pages = "2:1--2:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2133360.2133362", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Mining frequent subsequence patterns is a typical data-mining problem and various efficient sequential pattern mining algorithms have been proposed. In many application domains (e.g., biology), the frequent subsequences confined by the predefined gap requirements are more meaningful than the general sequential patterns. In this article, we propose two algorithms, Gap-BIDE for mining closed gap-constrained subsequences from a set of input sequences, and Gap-Connect for mining repetitive gap-constrained subsequences from a single input sequence. Inspired by some state-of-the-art closed or constrained sequential pattern mining algorithms, the Gap-BIDE algorithm adopts an efficient approach to finding the complete set of closed sequential patterns with gap constraints, while the Gap-Connect algorithm efficiently mines an approximate set of long patterns by connecting short patterns. We also present several methods for feature selection from the set of gap-constrained patterns for the purpose of classification and clustering. Our extensive performance study shows that our approaches are very efficient in mining frequent subsequences with gap constraints, and the gap-constrained pattern based classification/clustering approaches can achieve high-quality results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2012:IBA, author = "Fei Tony Liu and Kai Ming Ting and Zhi-Hua Zhou", title = "Isolation-Based Anomaly Detection", journal = j-TKDD, volume = "6", number = "1", pages = "3:1--3:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2133360.2133363", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Anomalies are data points that are few and different. As a result of these properties, we show that, anomalies are susceptible to a mechanism called isolation. This article proposes a method called Isolation Forest ($i$ Forest), which detects anomalies purely based on the concept of isolation without employing any distance or density measure---fundamentally different from all existing methods. As a result, $i$ Forest is able to exploit subsampling (i) to achieve a low linear time-complexity and a small memory-requirement and (ii) to deal with the effects of swamping and masking effectively. Our empirical evaluation shows that $i$ Forest outperforms ORCA, one-class SVM, LOF and Random Forests in terms of AUC, processing time, and it is robust against masking and swamping effects. $i$ Forest also works well in high dimensional problems containing a large number of irrelevant attributes, and when anomalies are not available in training sample.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jin:2012:MML, author = "Yu Jin and Nick Duffield and Jeffrey Erman and Patrick Haffner and Subhabrata Sen and Zhi-Li Zhang", title = "A Modular Machine Learning System for Flow-Level Traffic Classification in Large Networks", journal = j-TKDD, volume = "6", number = "1", pages = "4:1--4:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2133360.2133364", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The ability to accurately and scalably classify network traffic is of critical importance to a wide range of management tasks of large networks, such as tier-1 ISP networks and global enterprise networks. Guided by the practical constraints and requirements of traffic classification in large networks, in this article, we explore the design of an accurate and scalable machine learning based flow-level traffic classification system, which is trained on a dataset of flow-level data that has been annotated with application protocol labels by a packet-level classifier. Our system employs a lightweight modular architecture, which combines a series of simple linear binary classifiers, each of which can be efficiently implemented and trained on vast amounts of flow data in parallel, and embraces three key innovative mechanisms, weighted threshold sampling, logistic calibration, and intelligent data partitioning, to achieve scalability while attaining high accuracy. Evaluations using real traffic data from multiple locations in a large ISP show that our system accurately reproduces the labels of the packet level classifier when runs on (unlabeled) flow records, while meeting the scalability and stability requirements of large ISP networks. Using training and test datasets that are two months apart and collected from two different locations, the flow error rates are only 3\% for TCP flows and 0.4\% for UDP flows. We further show that such error rates can be reduced by combining the information of spatial distributions of flows, or collective traffic statistics, during classification. We propose a novel two-step model, which seamlessly integrates these collective traffic statistics into the existing traffic classification system. Experimental results display performance improvement on all traffic classes and an overall error rate reduction by 15\%. In addition to a high accuracy, at runtime, our implementation easily scales to classify traffic on 10Gbps links.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mavroeidis:2012:SSF, author = "Dimitrios Mavroeidis and Panagis Magdalinos", title = "A Sequential Sampling Framework for Spectral $k$-Means Based on Efficient Bootstrap Accuracy Estimations: Application to Distributed Clustering", journal = j-TKDD, volume = "6", number = "2", pages = "5:1--5:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2297456.2297457", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The scalability of learning algorithms has always been a central concern for data mining researchers, and nowadays, with the rapid increase in data storage capacities and availability, its importance has increased. To this end, sampling has been studied by several researchers in an effort to derive sufficiently accurate models using only small data fractions. In this article we focus on spectral $k$-means, that is, the $k$-means approximation as derived by the spectral relaxation, and propose a sequential sampling framework that iteratively enlarges the sample size until the $k$-means results (objective function and cluster structure) become indistinguishable from the asymptotic (infinite-data) output. In the proposed framework we adopt a commonly applied principle in data mining research that considers the use of minimal assumptions concerning the data generating distribution. This restriction imposes several challenges, mainly related to the efficiency of the sequential sampling procedure. These challenges are addressed using elements of matrix perturbation theory and statistics. Moreover, although the main focus is on spectral $k$-means, we also demonstrate that the proposed framework can be generalized to handle spectral clustering. The proposed sequential sampling framework is consecutively employed for addressing the distributed clustering problem, where the task is to construct a global model for data that resides in distributed network nodes. The main challenge in this context is related to the bandwidth constraints that are commonly imposed, thus requiring that the distributed clustering algorithm consumes a minimal amount of network load. This illustrates the applicability of the proposed approach, as it enables the determination of a minimal sample size that can be used for constructing an accurate clustering model that entails the distributional characteristics of the data. As opposed to the relevant distributed $k$-means approaches, our framework takes into account the fact that the choice of the number of clusters has a crucial effect on the required amount of communication. More precisely, the proposed algorithm is able to derive a statistical estimation of the required relative sizes for all possible values of $k$. This unique feature of our distributed clustering framework enables a network administrator to choose an economic solution that identifies the crude cluster structure of a dataset and not devote excessive network resources for identifying all the ``correct'' detailed clusters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Das:2012:MIG, author = "Sanmay Das and Malik Magdon-Ismail", title = "A Model for Information Growth in Collective Wisdom Processes", journal = j-TKDD, volume = "6", number = "2", pages = "6:1--6:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2297456.2297458", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Collaborative media such as wikis have become enormously successful venues for information creation. Articles accrue information through the asynchronous editing of users who arrive both seeking information and possibly able to contribute information. Most articles stabilize to high-quality, trusted sources of information representing the collective wisdom of all the users who edited the article. We propose a model for information growth which relies on two main observations: (i) as an article's quality improves, it attracts visitors at a faster rate (a rich-get-richer phenomenon); and, simultaneously, (ii) the chances that a new visitor will improve the article drops (there is only so much that can be said about a particular topic). Our model is able to reproduce many features of the edit dynamics observed on Wikipedia; in particular, it captures the observed rise in the edit rate, followed by $ 1 / t $ decay. Despite differences in the media, we also document similar features in the comment rates for a segment of the LiveJournal blogosphere.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2012:GME, author = "Tianbing Xu and Zhongfei Zhang and Philip S. Yu and Bo Long", title = "Generative Models for Evolutionary Clustering", journal = j-TKDD, volume = "6", number = "2", pages = "7:1--7:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2297456.2297459", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article studies evolutionary clustering, a recently emerged hot topic with many important applications, noticeably in dynamic social network analysis. In this article, based on the recent literature on nonparametric Bayesian models, we have developed two generative models: DPChain and HDP-HTM. DPChain is derived from the Dirichlet process mixture (DPM) model, with an exponential decaying component along with the time. HDP-HTM combines the hierarchical dirichlet process (HDP) with a hierarchical transition matrix (HTM) based on the proposed Infinite hierarchical Markov state model (iHMS). Both models substantially advance the literature on evolutionary clustering, in the sense that not only do they both perform better than those in the existing literature, but more importantly, they are capable of automatically learning the cluster numbers and explicitly addressing the corresponding issues. Extensive evaluations have demonstrated the effectiveness and the promise of these two solutions compared to the state-of-the-art literature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2012:LME, author = "Shaojun Wang and Dale Schuurmans and Yunxin Zhao", title = "The Latent Maximum Entropy Principle", journal = j-TKDD, volume = "6", number = "2", pages = "8:1--8:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2297456.2297460", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present an extension to Jaynes' maximum entropy principle that incorporates latent variables. The principle of latent maximum entropy we propose is different from both Jaynes' maximum entropy principle and maximum likelihood estimation, but can yield better estimates in the presence of hidden variables and limited training data. We first show that solving for a latent maximum entropy model poses a hard nonlinear constrained optimization problem in general. However, we then show that feasible solutions to this problem can be obtained efficiently for the special case of log-linear models---which forms the basis for an efficient approximation to the latent maximum entropy principle. We derive an algorithm that combines expectation-maximization with iterative scaling to produce feasible log-linear solutions. This algorithm can be interpreted as an alternating minimization algorithm in the information divergence, and reveals an intimate connection between the latent maximum entropy and maximum likelihood principles. To select a final model, we generate a series of feasible candidates, calculate the entropy of each, and choose the model that attains the highest entropy. Our experimental results show that estimation based on the latent maximum entropy principle generally gives better results than maximum likelihood when estimating latent variable models on small observed data samples.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bhattacharya:2012:CGC, author = "Indrajit Bhattacharya and Shantanu Godbole and Sachindra Joshi and Ashish Verma", title = "Cross-Guided Clustering: Transfer of Relevant Supervision across Tasks", journal = j-TKDD, volume = "6", number = "2", pages = "9:1--9:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2297456.2297461", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:38 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Lack of supervision in clustering algorithms often leads to clusters that are not useful or interesting to human reviewers. We investigate if supervision can be automatically transferred for clustering a target task, by providing a relevant supervised partitioning of a dataset from a different source task. The target clustering is made more meaningful for the human user by trading-off intrinsic clustering goodness on the target task for alignment with relevant supervised partitions in the source task, wherever possible. We propose a cross-guided clustering algorithm that builds on traditional k-means by aligning the target clusters with source partitions. The alignment process makes use of a cross-task similarity measure that discovers hidden relationships across tasks. When the source and target tasks correspond to different domains with potentially different vocabularies, we propose a projection approach using pivot vocabularies for the cross-domain similarity measure. Using multiple real-world and synthetic datasets, we show that our approach improves clustering accuracy significantly over traditional k-means and state-of-the-art semi-supervised clustering baselines, over a wide range of data characteristics and parameter settings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2012:LBN, author = "Zhenxing Wang and Laiwan Chan", title = "Learning {Bayesian} networks from {Markov} random fields: an efficient algorithm for linear models", journal = j-TKDD, volume = "6", number = "3", pages = "10:1--10:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362383.2362384", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:40 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Dependency analysis is a typical approach for Bayesian network learning, which infers the structures of Bayesian networks by the results of a series of conditional independence (CI) tests. In practice, testing independence conditioning on large sets hampers the performance of dependency analysis algorithms in terms of accuracy and running time for the following reasons. First, testing independence on large sets of variables with limited samples is not stable. Second, for most dependency analysis algorithms, the number of CI tests grows at an exponential rate with the sizes of conditioning sets, and the running time grows of the same rate. Therefore, determining how to reduce the number of CI tests and the sizes of conditioning sets becomes a critical step in dependency analysis algorithms. In this article, we address a two-phase algorithm based on the observation that the structures of Markov random fields are similar to those of Bayesian networks. The first phase of the algorithm constructs a Markov random field from data, which provides a close approximation to the structure of the true Bayesian network; the second phase of the algorithm removes redundant edges according to CI tests to get the true Bayesian network. Both phases use Markov blanket information to reduce the sizes of conditioning sets and the number of CI tests without sacrificing accuracy. An empirical study shows that the two-phase algorithm performs well in terms of accuracy and efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chan:2012:CID, author = "Jeffrey Chan and James Bailey and Christopher Leckie and Michael Houle", title = "{ciForager}: Incrementally discovering regions of correlated change in evolving graphs", journal = j-TKDD, volume = "6", number = "3", pages = "11:1--11:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362383.2362385", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:40 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data mining techniques for understanding how graphs evolve over time have become increasingly important. Evolving graphs arise naturally in diverse applications such as computer network topologies, multiplayer games and medical imaging. A natural and interesting problem in evolving graph analysis is the discovery of compact subgraphs that change in a similar manner. Such subgraphs are known as regions of correlated change and they can both summarise change patterns in graphs and help identify the underlying events causing these changes. However, previous techniques for discovering regions of correlated change suffer from limited scalability, making them unsuitable for analysing the evolution of very large graphs. In this paper, we introduce a new algorithm called ciForager, that addresses this scalability challenge and offers considerable improvements. The efficiency of ciForager is based on the use of new incremental techniques for detecting change, as well as the use of Voronoi representations for efficiently determining distance. We experimentally show that ciForager can achieve speedups of up to 1000 times over previous approaches. As a result, it becomes feasible for the first time to discover regions of correlated change in extremely large graphs, such as the entire BGP routing topology of the Internet.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2012:CDS, author = "Dingding Wang and Shenghuo Zhu and Tao Li and Yihong Gong", title = "Comparative document summarization via discriminative sentence selection", journal = j-TKDD, volume = "6", number = "3", pages = "12:1--12:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362383.2362386", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:40 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a collection of document groups, a natural question is to identify the differences among them. Although traditional document summarization techniques can summarize the content of the document groups one by one, there exists a great necessity to generate a summary of the differences among the document groups. In this article, we study a novel problem, that of summarizing the differences between document groups. A discriminative sentence selection method is proposed to extract the most discriminative sentences which represent the specific characteristics of each document group. Experiments and case studies on real-world data sets demonstrate the effectiveness of our proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{deMelo:2012:FNO, author = "Pedro O. S. {Vaz de Melo} and Virgilio A. F. Almeida and Antonio A. F. Loureiro and Christos Faloutsos", title = "Forecasting in the {NBA} and other team sports: Network effects in action", journal = j-TKDD, volume = "6", number = "3", pages = "13:1--13:??", month = oct, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362383.2362387", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Nov 6 18:30:40 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The multi-million sports-betting market is based on the fact that the task of predicting the outcome of a sports event is very hard. Even with the aid of an uncountable number of descriptive statistics and background information, only a few can correctly guess the outcome of a game or a league. In this work, our approach is to move away from the traditional way of predicting sports events, and instead to model sports leagues as networks of players and teams where the only information available is the work relationships among them. We propose two network-based models to predict the behavior of teams in sports leagues. These models are parameter-free, that is, they do not have a single parameter, and moreover are sport-agnostic: they can be applied directly to any team sports league. First, we view a sports league as a network in evolution, and we infer the implicit feedback behind network changes and properties over the years. Then, we use this knowledge to construct the network-based prediction models, which can, with a significantly high probability, indicate how well a team will perform over a season. We compare our proposed models with other prediction models in two of the most popular sports leagues: the National Basketball Association (NBA) and the Major League Baseball (MLB). Our model shows consistently good results in comparison with the other models and, relying upon the network properties of the teams, we achieved a $ \approx 14 \% $ rank prediction accuracy improvement over our best competitor.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ghosh:2012:SIB, author = "Joydeep Ghosh and Padhraic Smyth and Andrew Tomkins and Rich Caruana", title = "Special issue on best of {SIGKDD 2011}", journal = j-TKDD, volume = "6", number = "4", pages = "14:1--14:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382577.2382578", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:40 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kaufman:2012:LDM, author = "Shachar Kaufman and Saharon Rosset and Claudia Perlich and Ori Stitelman", title = "Leakage in data mining: Formulation, detection, and avoidance", journal = j-TKDD, volume = "6", number = "4", pages = "15:1--15:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382577.2382579", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:40 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Deemed ``one of the top ten data mining mistakes'', leakage is the introduction of information about the data mining target that should not be legitimately available to mine from. In addition to our own industry experience with real-life projects, controversies around several major public data mining competitions held recently such as the INFORMS 2010 Data Mining Challenge and the IJCNN 2011 Social Network Challenge are evidence that this issue is as relevant today as it has ever been. While acknowledging the importance and prevalence of leakage in both synthetic competitions and real-life data mining projects, existing literature has largely left this idea unexplored. What little has been said turns out not to be broad enough to cover more complex cases of leakage, such as those where the classical independently and identically distributed (i.i.d.) assumption is violated, that have been recently documented. In our new approach, these cases and others are explained by explicitly defining modeling goals and analyzing the broader framework of the data mining problem. The resulting definition enables us to derive general methodology for dealing with the issue. We show that it is possible to avoid leakage with a simple specific approach to data management followed by what we call a learn-predict separation, and present several ways of detecting leakage when the modeler has no control over how the data have been collected. We also offer an alternative point of view on leakage that is based on causal graph modeling concepts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mampaey:2012:SDS, author = "Michael Mampaey and Jilles Vreeken and Nikolaj Tatti", title = "Summarizing data succinctly with the most informative itemsets", journal = j-TKDD, volume = "6", number = "4", pages = "16:1--16:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382577.2382580", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:40 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge discovery from data is an inherently iterative process. That is, what we know about the data greatly determines our expectations, and therefore, what results we would find interesting and/or surprising. Given new knowledge about the data, our expectations will change. Hence, in order to avoid redundant results, knowledge discovery algorithms ideally should follow such an iterative updating procedure. With this in mind, we introduce a well-founded approach for succinctly summarizing data with the most informative itemsets; using a probabilistic maximum entropy model, we iteratively find the itemset that provides us the most novel information-that is, for which the frequency in the data surprises us the most-and in turn we update our model accordingly. As we use the maximum entropy principle to obtain unbiased probabilistic models, and only include those itemsets that are most informative with regard to the current model, the summaries we construct are guaranteed to be both descriptive and nonredundant. The algorithm that we present, called mtv, can either discover the top-$k$ most informative itemsets, or we can employ either the Bayesian Information Criterion (bic) or the Minimum Description Length (mdl) principle to automatically identify the set of itemsets that together summarize the data well. In other words, our method will ``tell you what you need to know'' about the data. Importantly, it is a one-phase algorithm: rather than picking itemsets from a user-provided candidate set, itemsets and their supports are mined on-the-fly. To further its applicability, we provide an efficient method to compute the maximum entropy distribution using Quick Inclusion-Exclusion. Experiments on our method, using synthetic, benchmark, and real data, show that the discovered summaries are succinct, and correctly identify the key patterns in the data. The models they form attain high likelihoods, and inspection shows that they summarize the data well with increasingly specific, yet nonredundant itemsets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chu:2012:TLM, author = "Shumo Chu and James Cheng", title = "Triangle listing in massive networks", journal = j-TKDD, volume = "6", number = "4", pages = "17:1--17:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382577.2382581", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:40 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Triangle listing is one of the fundamental algorithmic problems whose solution has numerous applications especially in the analysis of complex networks, such as the computation of clustering coefficients, transitivity, triangular connectivity, trusses, etc. Existing algorithms for triangle listing are mainly in-memory algorithms, whose performance cannot scale with the massive volume of today's fast growing networks. When the input graph cannot fit in main memory, triangle listing requires random disk accesses that can incur prohibitively huge I/O cost. Some streaming, semistreaming, and sampling algorithms have been proposed but these are approximation algorithms. We propose an I/O-efficient algorithm for triangle listing. Our algorithm is exact and avoids random disk access. Our results show that our algorithm is scalable and outperforms the state-of-the-art in-memory and local triangle estimation algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chattopadhyay:2012:MDA, author = "Rita Chattopadhyay and Qian Sun and Wei Fan and Ian Davidson and Sethuraman Panchanathan and Jieping Ye", title = "Multisource domain adaptation and its application to early detection of fatigue", journal = j-TKDD, volume = "6", number = "4", pages = "18:1--18:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382577.2382582", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:40 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We consider the characterization of muscle fatigue through a noninvasive sensing mechanism such as Surface ElectroMyoGraphy (SEMG). While changes in the properties of SEMG signals with respect to muscle fatigue have been reported in the literature, the large variation in these signals across different individuals makes the task of modeling and classification of SEMG signals challenging. Indeed, the variation in SEMG parameters from subject to subject creates differences in the data distribution. In this article, we propose two transfer learning frameworks based on the multisource domain adaptation methodology for detecting different stages of fatigue using SEMG signals, that addresses the distribution differences. In the proposed frameworks, the SEMG data of a subject represent a domain; data from multiple subjects in the training set form the multiple source domains and the test subject data form the target domain. SEMG signals are predominantly different in conditional probability distribution across subjects. The key feature of the first framework is a novel weighting scheme that addresses the conditional probability distribution differences across multiple domains (subjects) and the key feature of the second framework is a two-stage domain adaptation methodology which combines weighted data from multiple sources based on marginal probability differences (first stage) as well as conditional probability differences (second stage), with the target domain data. The weights for minimizing the marginal probability differences are estimated independently, while the weights for minimizing conditional probability differences are computed simultaneously by exploiting the potential interaction among multiple sources. We also provide a theoretical analysis on the generalization performance of the proposed multisource domain adaptation formulation using the weighted Rademacher complexity measure. We have validated the proposed frameworks on Surface ElectroMyoGram signals collected from 8 people during a fatigue-causing repetitive gripping activity. Comprehensive experiments on the SEMG dataset demonstrate that the proposed method improves the classification accuracy by 20\% to 30\% over the cases without any domain adaptation method and by 13\% to 30\% over existing state-of-the-art domain adaptation methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wilkinson:2012:SIS, author = "Leland Wilkinson and Anushka Anand and Tuan Nhon Dang", title = "Substantial improvements in the set-covering projection classifier {CHIRP} (composite hypercubes on iterated random projections)", journal = j-TKDD, volume = "6", number = "4", pages = "19:1--19:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2382577.2382583", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:40 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In Wilkinson et al. [2011] we introduced a new set-covering random projection classifier that achieved average error lower than that of other classifiers in the Weka platform. This classifier was based on an $ L^\infty $ norm distance function and exploited an iterative sequence of three stages (projecting, binning, and covering) to deal with the curse of dimensionality, computational complexity, and nonlinear separability. We now present substantial changes that improve robustness and reduce training and testing time by almost an order of magnitude without jeopardizing CHIRP's outstanding error performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Angiulli:2013:NNB, author = "Fabrizio Angiulli and Fabio Fassetti", title = "Nearest Neighbor-Based Classification of Uncertain Data", journal = j-TKDD, volume = "7", number = "1", pages = "1:1--1:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435209.2435210", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:44 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This work deals with the problem of classifying uncertain data. With this aim we introduce the Uncertain Nearest Neighbor (UNN) rule, which represents the generalization of the deterministic nearest neighbor rule to the case in which uncertain objects are available. The UNN rule relies on the concept of nearest neighbor class, rather than on that of nearest neighbor object. The nearest neighbor class of a test object is the class that maximizes the probability of providing its nearest neighbor. The evidence is that the former concept is much more powerful than the latter in the presence of uncertainty, in that it correctly models the right semantics of the nearest neighbor decision rule when applied to the uncertain scenario. An effective and efficient algorithm to perform uncertain nearest neighbor classification of a generic (un)certain test object is designed, based on properties that greatly reduce the temporal cost associated with nearest neighbor class probability computation. Experimental results are presented, showing that the UNN rule is effective and efficient in classifying uncertain data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2013:CDS, author = "Dingding Wang and Shenghuo Zhu and Tao Li and Yihong Gong", title = "Comparative Document Summarization via Discriminative Sentence Selection", journal = j-TKDD, volume = "7", number = "1", pages = "2:1--2:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435209.2435211", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:44 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a collection of document groups, a natural question is to identify the differences among these groups. Although traditional document summarization techniques can summarize the content of the document groups one by one, there exists a great necessity to generate a summary of the differences among the document groups. In this article, we study a novel problem of summarizing the differences between document groups. A discriminative sentence selection method is proposed to extract the most discriminative sentences that represent the specific characteristics of each document group. Experiments and case studies on real-world data sets demonstrate the effectiveness of our proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bayati:2013:MPA, author = "Mohsen Bayati and David F. Gleich and Amin Saberi and Ying Wang", title = "Message-Passing Algorithms for Sparse Network Alignment", journal = j-TKDD, volume = "7", number = "1", pages = "3:1--3:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435209.2435212", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:44 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Network alignment generalizes and unifies several approaches for forming a matching or alignment between the vertices of two graphs. We study a mathematical programming framework for network alignment problem and a sparse variation of it where only a small number of matches between the vertices of the two graphs are possible. We propose a new message passing algorithm that allows us to compute, very efficiently, approximate solutions to the sparse network alignment problems with graph sizes as large as hundreds of thousands of vertices. We also provide extensive simulations comparing our algorithms with two of the best solvers for network alignment problems on two synthetic matching problems, two bioinformatics problems, and three large ontology alignment problems including a multilingual problem with a known labeled alignment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2013:CWM, author = "Bin Li and Steven C. H. Hoi and Peilin Zhao and Vivekanand Gopalkrishnan", title = "Confidence Weighted Mean Reversion Strategy for Online Portfolio Selection", journal = j-TKDD, volume = "7", number = "1", pages = "4:1--4:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435209.2435213", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jun 24 13:02:44 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Online portfolio selection has been attracting increasing attention from the data mining and machine learning communities. All existing online portfolio selection strategies focus on the first order information of a portfolio vector, though the second order information may also be beneficial to a strategy. Moreover, empirical evidence shows that relative stock prices may follow the mean reversion property, which has not been fully exploited by existing strategies. This article proposes a novel online portfolio selection strategy named Confidence Weighted Mean Reversion (CWMR). Inspired by the mean reversion principle in finance and confidence weighted online learning technique in machine learning, CWMR models the portfolio vector as a Gaussian distribution, and sequentially updates the distribution by following the mean reversion trading principle. CWMR's closed-form updates clearly reflect the mean reversion trading idea. We also present several variants of CWMR algorithms, including a CWMR mixture algorithm that is theoretical universal. Empirically, CWMR strategy is able to effectively exploit the power of mean reversion for online portfolio selection. Extensive experiments on various real markets show that the proposed strategy is superior to the state-of-the-art techniques. The experimental testbed including source codes and data sets is available online.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lou:2013:LPR, author = "Tiancheng Lou and Jie Tang and John Hopcroft and Zhanpeng Fang and Xiaowen Ding", title = "Learning to predict reciprocity and triadic closure in social networks", journal = j-TKDD, volume = "7", number = "2", pages = "5:1--5:??", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2499907.2499908", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:06 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We study how links are formed in social networks. In particular, we focus on investigating how a reciprocal (two-way) link, the basic relationship in social networks, is developed from a parasocial (one-way) relationship and how the relationships further develop into triadic closure, one of the fundamental processes of link formation. We first investigate how geographic distance and interactions between users influence the formation of link structure among users. Then we study how social theories including homophily, social balance, and social status are satisfied over networks with parasocial and reciprocal relationships. The study unveils several interesting phenomena. For example, ``friend's friend is a friend'' indeed exists in the reciprocal relationship network, but does not hold in the parasocial relationship network. We propose a learning framework to formulate the problems of predicting reciprocity and triadic closure into a graphical model. We demonstrate that it is possible to accurately infer 90\% of reciprocal relationships in a Twitter network. The proposed model also achieves better performance (+20--30\% in terms of F1-measure) than several alternative methods for predicting the triadic closure formation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2013:EOL, author = "Haiqin Yang and Michael R. Lyu and Irwin King", title = "Efficient online learning for multitask feature selection", journal = j-TKDD, volume = "7", number = "2", pages = "6:1--6:??", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2499907.2499909", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:06 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Learning explanatory features across multiple related tasks, or MultiTask Feature Selection (MTFS), is an important problem in the applications of data mining, machine learning, and bioinformatics. Previous MTFS methods fulfill this task by batch-mode training. This makes them inefficient when data come sequentially or when the number of training data is so large that they cannot be loaded into the memory simultaneously. In order to tackle these problems, we propose a novel online learning framework to solve the MTFS problem. A main advantage of the online algorithm is its efficiency in both time complexity and memory cost. The weights of the MTFS models at each iteration can be updated by closed-form solutions based on the average of previous subgradients. This yields the worst-case bounds of the time complexity and memory cost at each iteration, both in the order of $ O(d \times Q) $, where $d$ is the number of feature dimensions and $Q$ is the number of tasks. Moreover, we provide theoretical analysis for the average regret of the online learning algorithms, which also guarantees the convergence rate of the algorithms. Finally, we conduct detailed experiments to show the characteristics and merits of the online learning algorithms in solving several MTFS problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2013:MRL, author = "Yu Zhang and Dit-Yan Yeung", title = "Multilabel relationship learning", journal = j-TKDD, volume = "7", number = "2", pages = "7:1--7:??", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2499907.2499910", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:06 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multilabel learning problems are commonly found in many applications. A characteristic shared by many multilabel learning problems is that some labels have significant correlations between them. In this article, we propose a novel multilabel learning method, called MultiLabel Relationship Learning (MLRL), which extends the conventional support vector machine by explicitly learning and utilizing the relationships between labels. Specifically, we model the label relationships using a label covariance matrix and use it to define a new regularization term for the optimization problem. MLRL learns the model parameters and the label covariance matrix simultaneously based on a unified convex formulation. To solve the convex optimization problem, we use an alternating method in which each subproblem can be solved efficiently. The relationship between MLRL and two widely used maximum margin methods for multilabel learning is investigated. Moreover, we also propose a semisupervised extension of MLRL, called SSMLRL, to demonstrate how to make use of unlabeled data to help learn the label covariance matrix. Through experiments conducted on some multilabel applications, we find that MLRL not only gives higher classification accuracy but also has better interpretability as revealed by the label covariance matrix.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Peng:2013:EFF, author = "Jing Peng and Guna Seetharaman and Wei Fan and Aparna Varde", title = "Exploiting {Fisher} and {Fukunaga--Koontz} transforms in {Chernoff} dimensionality reduction", journal = j-TKDD, volume = "7", number = "2", pages = "8:1--8:??", month = jul, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2499907.2499911", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:06 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge discovery from big data demands effective representation of data. However, big data are often characterized by high dimensionality, which makes knowledge discovery more difficult. Many techniques for dimensionality reduction have been proposed, including well-known Fisher's Linear Discriminant Analysis (LDA). However, the Fisher criterion is incapable of dealing with heteroscedasticity in the data. A technique based on the Chernoff criterion for linear dimensionality reduction has been proposed that is capable of exploiting heteroscedastic information in the data. While the Chernoff criterion has been shown to outperform the Fisher's, a clear understanding of its exact behavior is lacking. In this article, we show precisely what can be expected from the Chernoff criterion. In particular, we show that the Chernoff criterion exploits the Fisher and Fukunaga-Koontz transforms in computing its linear discriminants. Furthermore, we show that a recently proposed decomposition of the data space into four subspaces is incomplete. We provide arguments on how to best enrich the decomposition of the data space in order to account for heteroscedasticity in the data. Finally, we provide experimental results validating our theoretical analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Agarwal:2013:ISI, author = "Deepak Agarwal and Rich Caruana and Jian Pei and Ke Wang", title = "Introduction to the {Special Issue ACM SIGKDD 2012}", journal = j-TKDD, volume = "7", number = "3", pages = "9:1--9:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2513092.2513093", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rakthanmanon:2013:ABD, author = "Thanawin Rakthanmanon and Bilson Campana and Abdullah Mueen and Gustavo Batista and Brandon Westover and Qiang Zhu and Jesin Zakaria and Eamonn Keogh", title = "Addressing Big Data Time Series: Mining Trillions of Time Series Subsequences Under Dynamic Time Warping", journal = j-TKDD, volume = "7", number = "3", pages = "10:1--10:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2500489", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Most time series data mining algorithms use similarity search as a core subroutine, and thus the time taken for similarity search is the bottleneck for virtually all time series data mining algorithms, including classification, clustering, motif discovery, anomaly detection, and so on. The difficulty of scaling a search to large datasets explains to a great extent why most academic work on time series data mining has plateaued at considering a few millions of time series objects, while much of industry and science sits on billions of time series objects waiting to be explored. In this work we show that by using a combination of four novel ideas we can search and mine massive time series for the first time. We demonstrate the following unintuitive fact: in large datasets we can exactly search under Dynamic Time Warping (DTW) much more quickly than the current state-of-the-art Euclidean distance search algorithms. We demonstrate our work on the largest set of time series experiments ever attempted. In particular, the largest dataset we consider is larger than the combined size of all of the time series datasets considered in all data mining papers ever published. We explain how our ideas allow us to solve higher-level time series data mining problems such as motif discovery and clustering at scales that would otherwise be untenable. Moreover, we show how our ideas allow us to efficiently support the uniform scaling distance measure, a measure whose utility seems to be underappreciated, but which we demonstrate here. In addition to mining massive datasets with up to one trillion datapoints, we will show that our ideas also have implications for real-time monitoring of data streams, allowing us to handle much faster arrival rates and/or use cheaper and lower powered devices than are currently possible.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2013:PIM, author = "Yizhou Sun and Brandon Norick and Jiawei Han and Xifeng Yan and Philip S. Yu and Xiao Yu", title = "{PathSelClus}: Integrating Meta-Path Selection with User-Guided Object Clustering in Heterogeneous Information Networks", journal = j-TKDD, volume = "7", number = "3", pages = "11:1--11:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2500492", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Real-world, multiple-typed objects are often interconnected, forming heterogeneous information networks. A major challenge for link-based clustering in such networks is their potential to generate many different results, carrying rather diverse semantic meanings. In order to generate desired clustering, we propose to use meta-path, a path that connects object types via a sequence of relations, to control clustering with distinct semantics. Nevertheless, it is easier for a user to provide a few examples (seeds) than a weighted combination of sophisticated meta-paths to specify her clustering preference. Thus, we propose to integrate meta-path selection with user-guided clustering to cluster objects in networks, where a user first provides a small set of object seeds for each cluster as guidance. Then the system learns the weight for each meta-path that is consistent with the clustering result implied by the guidance, and generates clusters under the learned weights of meta-paths. A probabilistic approach is proposed to solve the problem, and an effective and efficient iterative algorithm, PathSelClus, is proposed to learn the model, where the clustering quality and the meta-path weights mutually enhance each other. Our experiments with several clustering tasks in two real networks and one synthetic network demonstrate the power of the algorithm in comparison with the baselines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bellare:2013:ASE, author = "Kedar Bellare and Suresh Iyengar and Aditya Parameswaran and Vibhor Rastogi", title = "Active Sampling for Entity Matching with Guarantees", journal = j-TKDD, volume = "7", number = "3", pages = "12:1--12:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2500490", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In entity matching, a fundamental issue while training a classifier to label pairs of entities as either duplicates or nonduplicates is the one of selecting informative training examples. Although active learning presents an attractive solution to this problem, previous approaches minimize the misclassification rate (0--1 loss) of the classifier, which is an unsuitable metric for entity matching due to class imbalance (i.e., many more nonduplicate pairs than duplicate pairs). To address this, a recent paper [Arasu et al. 2010] proposes to maximize recall of the classifier under the constraint that its precision should be greater than a specified threshold. However, the proposed technique requires the labels of all n input pairs in the worst case. Our main result is an active learning algorithm that approximately maximizes recall of the classifier while respecting a precision constraint with provably sublinear label complexity (under certain distributional assumptions). Our algorithm uses as a black box any active learning module that minimizes 0--1 loss. We show that label complexity of our algorithm is at most log n times the label complexity of the black box, and also bound the difference in the recall of classifier learnt by our algorithm and the recall of the optimal classifier satisfying the precision constraint. We provide an empirical evaluation of our algorithm on several real-world matching data sets that demonstrates the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chattopadhyay:2013:BMA, author = "Rita Chattopadhyay and Zheng Wang and Wei Fan and Ian Davidson and Sethuraman Panchanathan and Jieping Ye", title = "Batch Mode Active Sampling Based on Marginal Probability Distribution Matching", journal = j-TKDD, volume = "7", number = "3", pages = "13:1--13:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2513092.2513094", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Active Learning is a machine learning and data mining technique that selects the most informative samples for labeling and uses them as training data; it is especially useful when there are large amount of unlabeled data and labeling them is expensive. Recently, batch-mode active learning, where a set of samples are selected concurrently for labeling, based on their collective merit, has attracted a lot of attention. The objective of batch-mode active learning is to select a set of informative samples so that a classifier learned on these samples has good generalization performance on the unlabeled data. Most of the existing batch-mode active learning methodologies try to achieve this by selecting samples based on certain criteria. In this article we propose a novel criterion which achieves good generalization performance of a classifier by specifically selecting a set of query samples that minimize the difference in distribution between the labeled and the unlabeled data, after annotation. We explicitly measure this difference based on all candidate subsets of the unlabeled data and select the best subset. The proposed objective is an NP-hard integer programming optimization problem. We provide two optimization techniques to solve this problem. In the first one, the problem is transformed into a convex quadratic programming problem and in the second method the problem is transformed into a linear programming problem. Our empirical studies using publicly available UCI datasets and two biomedical image databases demonstrate the effectiveness of the proposed approach in comparison with the state-of-the-art batch-mode active learning methods. We also present two extensions of the proposed approach, which incorporate uncertainty of the predicted labels of the unlabeled data and transfer learning in the proposed formulation. In addition, we present a joint optimization framework for performing both transfer and active learning simultaneously unlike the existing approaches of learning in two separate stages, that is, typically, transfer learning followed by active learning. We specifically minimize a common objective of reducing distribution difference between the domain adapted source, the queried and labeled samples and the rest of the unlabeled target domain data. Our empirical studies on two biomedical image databases and on a publicly available 20 Newsgroups dataset show that incorporation of uncertainty information and transfer learning further improves the performance of the proposed active learning based classifier. Our empirical studies also show that the proposed transfer-active method based on the joint optimization framework performs significantly better than a framework which implements transfer and active learning in two separate stages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Briggs:2013:IAM, author = "Forrest Briggs and Xiaoli Z. Fern and Raviv Raich and Qi Lou", title = "Instance Annotation for Multi-Instance Multi-Label Learning", journal = j-TKDD, volume = "7", number = "3", pages = "14:1--14:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2500491", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multi-instance multi-label learning (MIML) is a framework for supervised classification where the objects to be classified are bags of instances associated with multiple labels. For example, an image can be represented as a bag of segments and associated with a list of objects it contains. Prior work on MIML has focused on predicting label sets for previously unseen bags. We instead consider the problem of predicting instance labels while learning from data labeled only at the bag level. We propose a regularized rank-loss objective designed for instance annotation, which can be instantiated with different aggregation models connecting instance-level labels with bag-level label sets. The aggregation models that we consider can be factored as a linear function of a ``support instance'' for each class, which is a single feature vector representing a whole bag. Hence we name our proposed methods rank-loss Support Instance Machines (SIM). We propose two optimization methods for the rank-loss objective, which is nonconvex. One is a heuristic method that alternates between updating support instances, and solving a convex problem in which the support instances are treated as constant. The other is to apply the constrained concave-convex procedure (CCCP), which can also be interpreted as iteratively updating support instances and solving a convex problem. To solve the convex problem, we employ the Pegasos framework of primal subgradient descent, and prove that it finds an $ \epsilon $-suboptimal solution in runtime that is linear in the number of bags, instances, and $ 1 / \epsilon $. Additionally, we suggest a method of extending the linear learning algorithm to nonlinear classification, without increasing the runtime asymptotically. Experiments on artificial and real-world datasets including images and audio show that the proposed methods achieve higher accuracy than other loss functions used in prior work, e.g., Hamming loss, and recent work in ambiguous label classification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ji:2013:PFR, author = "Ming Ji and Binbin Lin and Xiaofei He and Deng Cai and Jiawei Han", title = "Parallel Field Ranking", journal = j-TKDD, volume = "7", number = "3", pages = "15:1--15:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2513092.2513096", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:07 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recently, ranking data with respect to the intrinsic geometric structure (manifold ranking) has received considerable attentions, with encouraging performance in many applications in pattern recognition, information retrieval and recommendation systems. Most of the existing manifold ranking methods focus on learning a ranking function that varies smoothly along the data manifold. However, beyond smoothness, a desirable ranking function should vary monotonically along the geodesics of the data manifold, such that the ranking order along the geodesics is preserved. In this article, we aim to learn a ranking function that varies linearly and therefore monotonically along the geodesics of the data manifold. Recent theoretical work shows that the gradient field of a linear function on the manifold has to be a parallel vector field. Therefore, we propose a novel ranking algorithm on the data manifolds, called Parallel Field Ranking. Specifically, we try to learn a ranking function and a vector field simultaneously. We require the vector field to be close to the gradient field of the ranking function, and the vector field to be as parallel as possible. Moreover, we require the value of the ranking function at the query point to be the highest, and then decrease linearly along the manifold. Experimental results on both synthetic data and real data demonstrate the effectiveness of our proposed algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Adali:2013:IPR, author = "Sibel Adali and Malik Magdon-Ismail and Xiaohui Lu", title = "{iHypR}: Prominence ranking in networks of collaborations with hyperedges 1", journal = j-TKDD, volume = "7", number = "4", pages = "16:1--16:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2541268.2541269", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:09 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present a new algorithm called iHypR for computing prominence of actors in social networks of collaborations. Our algorithm builds on the assumption that prominent actors collaborate on prominent objects, and prominent objects are naturally grouped into prominent clusters or groups (hyperedges in a graph). iHypR makes use of the relationships between actors, objects, and hyperedges to compute a global prominence score for the actors in the network. We do not assume the hyperedges are given in advance. Hyperedges computed by our method can perform as well or even better than ``true'' hyperedges. Our algorithm is customized for networks of collaborations, but it is generally applicable without further tuning. We show, through extensive experimentation with three real-life data sets and multiple external measures of prominence, that our algorithm outperforms existing well-known algorithms. Our work is the first to offer such an extensive evaluation. We show that unlike most existing algorithms, the performance is robust across multiple measures of performance. Further, we give a detailed study of the sensitivity of our algorithm to different data sets and the design choices within the algorithm that a user may wish to change. Our article illustrates the various trade-offs that must be considered in computing prominence in collaborative social networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2013:STP, author = "Jin Huang and Feiping Nie and Heng Huang and Yi-Cheng Tu and Yu Lei", title = "Social trust prediction using heterogeneous networks", journal = j-TKDD, volume = "7", number = "4", pages = "17:1--17:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2541268.2541270", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:09 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Along with increasing popularity of social websites, online users rely more on the trustworthiness information to make decisions, extract and filter information, and tag and build connections with other users. However, such social network data often suffer from severe data sparsity and are not able to provide users with enough information. Therefore, trust prediction has emerged as an important topic in social network research. Traditional approaches are primarily based on exploring trust graph topology itself. However, research in sociology and our life experience suggest that people who are in the same social circle often exhibit similar behaviors and tastes. To take advantage of the ancillary information for trust prediction, the challenge then becomes what to transfer and how to transfer. In this article, we address this problem by aggregating heterogeneous social networks and propose a novel joint social networks mining (JSNM) method. Our new joint learning model explores the user-group-level similarity between correlated graphs and simultaneously learns the individual graph structure; therefore, the shared structures and patterns from multiple social networks can be utilized to enhance the prediction tasks. As a result, we not only improve the trust prediction in the target graph but also facilitate other information retrieval tasks in the auxiliary graphs. To optimize the proposed objective function, we use the alternative technique to break down the objective function into several manageable subproblems. We further introduce the auxiliary function to solve the optimization problems with rigorously proved convergence. The extensive experiments have been conducted on both synthetic and real- world data. All empirical results demonstrate the effectiveness of our method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guzzo:2013:SIF, author = "Antonella Guzzo and Luigi Moccia and Domenico Sacc{\`a} and Edoardo Serra", title = "Solving inverse frequent itemset mining with infrequency constraints via large-scale linear programs", journal = j-TKDD, volume = "7", number = "4", pages = "18:1--18:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2541268.2541271", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:09 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Inverse frequent set mining (IFM) is the problem of computing a transaction database D satisfying given support constraints for some itemsets, which are typically the frequent ones. This article proposes a new formulation of IFM, called IFM$_I$ (IFM with infrequency constraints), where the itemsets that are not listed as frequent are constrained to be infrequent; that is, they must have a support less than or equal to a specified unique threshold. An instance of IFM$_I$ can be seen as an instance of the original IFM by making explicit the infrequency constraints for the minimal infrequent itemsets, corresponding to the so-called negative generator border defined in the literature. The complexity increase from PSPACE (complexity of IFM) to NEXP (complexity of IFM$_I$) is caused by the cardinality of the negative generator border, which can be exponential in the original input size. Therefore, the article introduces a specific problem parameter $ \kappa $ that computes an upper bound to this cardinality using a hypergraph interpretation for which minimal infrequent itemsets correspond to minimal transversals. By fixing a constant k, the article formulates a $k$-bounded definition of the problem, called $k$-IFM$_I$, that collects all instances for which the value of the parameter $ \kappa $ is less than or equal to $k$-its complexity is in PSPACE as for IFM. The bounded problem is encoded as an integer linear program with a large number of variables (actually exponential w.r.t. the number of constraints), which is thereafter approximated by relaxing integer constraints-the decision problem of solving the linear program is proven to be in NP. In order to solve the linear program, a column generation technique is used that is a variation of the simplex method designed to solve large-scale linear programs, in particular with a huge number of variables. The method at each step requires the solution of an auxiliary integer linear program, which is proven to be NP hard in this case and for which a greedy heuristic is presented. The resulting overall column generation solution algorithm enjoys very good scaling as evidenced by the intensive experimentation, thereby paving the way for its application in real-life scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Balcazar:2013:FCP, author = "Jos{\'e} L. Balc{\'a}zar", title = "Formal and computational properties of the confidence boost of association rules", journal = j-TKDD, volume = "7", number = "4", pages = "19:1--19:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2541268.2541272", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:09 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Some existing notions of redundancy among association rules allow for a logical-style characterization and lead to irredundant bases of absolutely minimum size. We push the intuition of redundancy further to find an intuitive notion of novelty of an association rule, with respect to other rules. Namely, an irredundant rule is so because its confidence is higher than what the rest of the rules would suggest; then, one can ask: how much higher? We propose to measure such a sort of novelty through the confidence boost of a rule. Acting as a complement to confidence and support, the confidence boost helps to obtain small and crisp sets of mined association rules and solves the well-known problem that, in certain cases, rules of negative correlation may pass the confidence bound. We analyze the properties of two versions of the notion of confidence boost, one of them a natural generalization of the other. We develop algorithms to filter rules according to their confidence boost, compare the concept to some similar notions in the literature, and describe the results of some experimentation employing the new notions on standard benchmark datasets. We describe an open source association mining tool that embodies one of our variants of confidence boost in such a way that the data mining process does not require the user to select any value for any parameter.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ang:2013:CPN, author = "Hock Hee Ang and Vivekanand Gopalkrishnan and Steven C. H. Hoi and Wee Keong Ng", title = "Classification in {P2P} networks with cascade support vector machines", journal = j-TKDD, volume = "7", number = "4", pages = "20:1--20:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2541268.2541273", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:09 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Classification in Peer-to-Peer (P2P) networks is important to many real applications, such as distributed intrusion detection, distributed recommendation systems, and distributed antispam detection. However, it is very challenging to perform classification in P2P networks due to many practical issues, such as scalability, peer dynamism, and asynchronism. This article investigates the practical techniques of constructing Support Vector Machine (SVM) classifiers in the P2P networks. In particular, we demonstrate how to efficiently cascade SVM in a P2P network with the use of reduced SVM. In addition, we propose to fuse the concept of cascade SVM with bootstrap aggregation to effectively balance the trade-off between classification accuracy, model construction, and prediction cost. We provide theoretical insights for the proposed solutions and conduct an extensive set of empirical studies on a number of large-scale datasets. Encouraging results validate the efficacy of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2014:ISI, author = "Wei Chen and Jie Tang", title = "Introduction to special issue on computational aspects of social and information networks: Theory, methodologies, and applications {(TKDD-CASIN)}", journal = j-TKDD, volume = "8", number = "1", pages = "1:1--1:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2556608", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:11 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2014:USN, author = "Zhi Yang and Christo Wilson and Xiao Wang and Tingting Gao and Ben Y. Zhao and Yafei Dai", title = "Uncovering social network {Sybils} in the wild", journal = j-TKDD, volume = "8", number = "1", pages = "2:1--2:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2556609", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:11 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Sybil accounts are fake identities created to unfairly increase the power or resources of a single malicious user. Researchers have long known about the existence of Sybil accounts in online communities such as file-sharing systems, but they have not been able to perform large-scale measurements to detect them or measure their activities. In this article, we describe our efforts to detect, characterize, and understand Sybil account activity in the Renren Online Social Network (OSN). We use ground truth provided by Renren Inc. to build measurement-based Sybil detectors and deploy them on Renren to detect more than 100,000 Sybil accounts. Using our full dataset of 650,000 Sybils, we examine several aspects of Sybil behavior. First, we study their link creation behavior and find that contrary to prior conjecture, Sybils in OSNs do not form tight-knit communities. Next, we examine the fine-grained behaviors of Sybils on Renren using clickstream data. Third, we investigate behind-the-scenes collusion between large groups of Sybils. Our results reveal that Sybils with no explicit social ties still act in concert to launch attacks. Finally, we investigate enhanced techniques to identify stealthy Sybils. In summary, our study advances the understanding of Sybil behavior on OSNs and shows that Sybils can effectively avoid existing community-based Sybil detectors. We hope that our results will foster new research on Sybil detection that is based on novel types of Sybil features.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jin:2014:SAR, author = "Ruoming Jin and Victor E. Lee and Longjie Li", title = "Scalable and axiomatic ranking of network role similarity", journal = j-TKDD, volume = "8", number = "1", pages = "3:1--3:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2518176", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:11 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A key task in analyzing social networks and other complex networks is role analysis: describing and categorizing nodes according to how they interact with other nodes. Two nodes have the same role if they interact with equivalent sets of neighbors. The most fundamental role equivalence is automorphic equivalence. Unfortunately, the fastest algorithms known for graph automorphism are nonpolynomial. Moreover, since exact equivalence is rare, a more meaningful task is measuring the role similarity between any two nodes. This task is closely related to the structural or link-based similarity problem that SimRank addresses. However, SimRank and other existing similarity measures are not sufficient because they do not guarantee to recognize automorphically or structurally equivalent nodes. This article makes two contributions. First, we present and justify several axiomatic properties necessary for a role similarity measure or metric. Second, we present RoleSim, a new similarity metric that satisfies these axioms and can be computed with a simple iterative algorithm. We rigorously prove that RoleSim satisfies all of these axiomatic properties. We also introduce Iceberg RoleSim, a scalable algorithm that discovers all pairs with RoleSim scores above a user-defined threshold $ \theta $. We demonstrate the interpretative power of RoleSim on both synthetic and real datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mcauley:2014:DSC, author = "Julian Mcauley and Jure Leskovec", title = "Discovering social circles in ego networks", journal = j-TKDD, volume = "8", number = "1", pages = "4:1--4:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2556612", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:11 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "People's personal social networks are big and cluttered, and currently there is no good way to automatically organize them. Social networking sites allow users to manually categorize their friends into social circles (e.g., ``circles'' on Google+, and ``lists'' on Facebook and Twitter). However, circles are laborious to construct and must be manually updated whenever a user's network grows. In this article, we study the novel task of automatically identifying users' social circles. We pose this task as a multimembership node clustering problem on a user's ego network, a network of connections between her friends. We develop a model for detecting circles that combines network structure as well as user profile information. For each circle, we learn its members and the circle-specific user profile similarity metric. Modeling node membership to multiple circles allows us to detect overlapping as well as hierarchically nested circles. Experiments show that our model accurately identifies circles on a diverse set of data from Facebook, Google+, and Twitter, for all of which we obtain hand-labeled ground truth.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Abrahao:2014:SFA, author = "Bruno Abrahao and Sucheta Soundarajan and John Hopcroft and Robert Kleinberg", title = "A separability framework for analyzing community structure", journal = j-TKDD, volume = "8", number = "1", pages = "5:1--5:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2527231", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:11 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Four major factors govern the intricacies of community extraction in networks: (1) the literature offers a multitude of disparate community detection algorithms whose output exhibits high structural variability across the collection, (2) communities identified by algorithms may differ structurally from real communities that arise in practice, (3) there is no consensus characterizing how to discriminate communities from noncommunities, and (4) the application domain includes a wide variety of networks of fundamentally different natures. In this article, we present a class separability framework to tackle these challenges through a comprehensive analysis of community properties. Our approach enables the assessment of the structural dissimilarity among the output of multiple community detection algorithms and between the output of algorithms and communities that arise in practice. In addition, our method provides us with a way to organize the vast collection of community detection algorithms by grouping those that behave similarly. Finally, we identify the most discriminative graph-theoretical properties of community signature and the small subset of properties that account for most of the biases of the different community detection algorithms. We illustrate our approach with an experimental analysis, which reveals nuances of the structure of real and extracted communities. In our experiments, we furnish our framework with the output of 10 different community detection procedures, representative of categories of popular algorithms available in the literature, applied to a diverse collection of large-scale real network datasets whose domains span biology, online shopping, and social systems. We also analyze communities identified by annotations that accompany the data, which reflect exemplar communities in various domain. We characterize these communities using a broad spectrum of community properties to produce the different structural classes. As our experiments show that community structure is not a universal concept, our framework enables an informed choice of the most suitable community detection method for identifying communities of a specific type in a given network and allows for a comparison of existing community detection algorithms while guiding the design of new ones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhong:2014:UBL, author = "Erheng Zhong and Wei Fan and Qiang Yang", title = "User behavior learning and transfer in composite social networks", journal = j-TKDD, volume = "8", number = "1", pages = "6:1--6:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2556613", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 13 09:16:11 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Accurate prediction of user behaviors is important for many social media applications, including social marketing, personalization, and recommendation. A major challenge lies in that although many previous works model user behavior from only historical behavior logs, the available user behavior data or interactions between users and items in a given social network are usually very limited and sparse (e.g., $ \geq 99.9 \% $ empty), which makes models overfit the rare observations and fail to provide accurate predictions. We observe that many people are members of several social networks in the same time, such as Facebook, Twitter, and Tencent's QQ. Importantly, users' behaviors and interests in different networks influence one another. This provides an opportunity to leverage the knowledge of user behaviors in different networks by considering the overlapping users in different networks as bridges, in order to alleviate the data sparsity problem, and enhance the predictive performance of user behavior modeling. Combining different networks ``simply and naively'' does not work well. In this article, we formulate the problem to model multiple networks as ``adaptive composite transfer'' and propose a framework called ComSoc. ComSoc first selects the most suitable networks inside a composite social network via a hierarchical Bayesian model, parameterized for individual users. It then builds topic models for user behavior prediction using both the relationships in the selected networks and related behavior data. With different relational regularization, we introduce different implementations, corresponding to different ways to transfer knowledge from composite social relations. To handle big data, we have implemented the algorithm using Map/Reduce. We demonstrate that the proposed composite network-based user behavior models significantly improve the predictive accuracy over a number of existing approaches on several real-world applications, including a very large social networking dataset from Tencent Inc.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ahmed:2014:NSS, author = "Nesreen K. Ahmed and Jennifer Neville and Ramana Kompella", title = "Network Sampling: From Static to Streaming Graphs", journal = j-TKDD, volume = "8", number = "2", pages = "7:1--7:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601438", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jun 26 05:48:22 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Network sampling is integral to the analysis of social, information, and biological networks. Since many real-world networks are massive in size, continuously evolving, and/or distributed in nature, the network structure is often sampled in order to facilitate study. For these reasons, a more thorough and complete understanding of network sampling is critical to support the field of network science. In this paper, we outline a framework for the general problem of network sampling by highlighting the different objectives, population and units of interest, and classes of network sampling methods. In addition, we propose a spectrum of computational models for network sampling methods, ranging from the traditionally studied model based on the assumption of a static domain to a more challenging model that is appropriate for streaming domains. We design a family of sampling methods based on the concept of graph induction that generalize across the full spectrum of computational models (from static to streaming) while efficiently preserving many of the topological properties of the input graphs. Furthermore, we demonstrate how traditional static sampling algorithms can be modified for graph streams for each of the three main classes of sampling methods: node, edge, and topology-based sampling. Experimental results indicate that our proposed family of sampling methods more accurately preserve the underlying properties of the graph in both static and streaming domains. Finally, we study the impact of network sampling algorithms on the parameter estimation and performance evaluation of relational classification algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ge:2014:RMA, author = "Yong Ge and Guofei Jiang and Min Ding and Hui Xiong", title = "Ranking Metric Anomaly in Invariant Networks", journal = j-TKDD, volume = "8", number = "2", pages = "8:1--8:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601436", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jun 26 05:48:22 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The management of large-scale distributed information systems relies on the effective use and modeling of monitoring data collected at various points in the distributed information systems. A traditional approach to model monitoring data is to discover invariant relationships among the monitoring data. Indeed, we can discover all invariant relationships among all pairs of monitoring data and generate invariant networks, where a node is a monitoring data source (metric) and a link indicates an invariant relationship between two monitoring data. Such an invariant network representation can help system experts to localize and diagnose the system faults by examining those broken invariant relationships and their related metrics, since system faults usually propagate among the monitoring data and eventually lead to some broken invariant relationships. However, at one time, there are usually a lot of broken links (invariant relationships) within an invariant network. Without proper guidance, it is difficult for system experts to manually inspect this large number of broken links. To this end, in this article, we propose the problem of ranking metrics according to the anomaly levels for a given invariant network, while this is a nontrivial task due to the uncertainties and the complex nature of invariant networks. Specifically, we propose two types of algorithms for ranking metric anomaly by link analysis in invariant networks. Along this line, we first define two measurements to quantify the anomaly level of each metric, and introduce the m Rank algorithm. Also, we provide a weighted score mechanism and develop the g Rank algorithm, which involves an iterative process to obtain a score to measure the anomaly levels. In addition, some extended algorithms based on m Rank and g Rank algorithms are developed by taking into account the probability of being broken as well as noisy links. Finally, we validate all the proposed algorithms on a large number of real-world and synthetic data sets to illustrate the effectiveness and efficiency of different algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2014:DGP, author = "Gensheng Zhang and Xiao Jiang and Ping Luo and Min Wang and Chengkai Li", title = "Discovering General Prominent Streaks in Sequence Data", journal = j-TKDD, volume = "8", number = "2", pages = "9:1--9:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601439", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jun 26 05:48:22 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article studies the problem of prominent streak discovery in sequence data. Given a sequence of values, a prominent streak is a long consecutive subsequence consisting of only large (small) values, such as consecutive games of outstanding performance in sports, consecutive hours of heavy network traffic, and consecutive days of frequent mentioning of a person in social media. Prominent streak discovery provides insightful data patterns for data analysis in many real-world applications and is an enabling technique for computational journalism. Given its real-world usefulness and complexity, the research on prominent streaks in sequence data opens a spectrum of challenging problems. A baseline approach to finding prominent streaks is a quadratic algorithm that exhaustively enumerates all possible streaks and performs pairwise streak dominance comparison. For more efficient methods, we make the observation that prominent streaks are in fact skyline points in two dimensions-streak interval length and minimum value in the interval. Our solution thus hinges on the idea to separate the two steps in prominent streak discovery: candidate streak generation and skyline operation over candidate streaks. For candidate generation, we propose the concept of local prominent streak (LPS). We prove that prominent streaks are a subset of LPSs and the number of LPSs is less than the length of a data sequence, in comparison with the quadratic number of candidates produced by the brute-force baseline method. We develop efficient algorithms based on the concept of LPS. The nonlinear local prominent streak (NLPS)-based method considers a superset of LPSs as candidates, and the linear local prominent streak (LLPS)-based method further guarantees to consider only LPSs. The proposed properties and algorithms are also extended for discovering general top-$k$, multisequence, and multidimensional prominent streaks. The results of experiments using multiple real datasets verified the effectiveness of the proposed methods and showed orders of magnitude performance improvement against the baseline method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Schifanella:2014:MTD, author = "Claudio Schifanella and K. Sel{\c{c}}uk Candan and Maria Luisa Sapino", title = "Multiresolution Tensor Decompositions with Mode Hierarchies", journal = j-TKDD, volume = "8", number = "2", pages = "10:1--10:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2532169", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jun 26 05:48:22 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Tensors (multidimensional arrays) are widely used for representing high-order dimensional data, in applications ranging from social networks, sensor data, and Internet traffic. Multiway data analysis techniques, in particular tensor decompositions, allow extraction of hidden correlations among multiway data and thus are key components of many data analysis frameworks. Intuitively, these algorithms can be thought of as multiway clustering schemes, which consider multiple facets of the data in identifying clusters, their weights, and contributions of each data element. Unfortunately, algorithms for fitting multiway models are, in general, iterative and very time consuming. In this article, we observe that, in many applications, there is a priori background knowledge (or metadata) about one or more domain dimensions. This metadata is often in the form of a hierarchy that clusters the elements of a given data facet (or mode). We investigate whether such single-mode data hierarchies can be used to boost the efficiency of tensor decomposition process, without significant impact on the final decomposition quality. We consider each domain hierarchy as a guide to help provide higher- or lower-resolution views of the data in the tensor on demand and we rely on these metadata-induced multiresolution tensor representations to develop a multiresolution approach to tensor decomposition. In this article, we focus on an alternating least squares (ALS)--based implementation of the two most important decomposition models such as the PARAllel FACtors (PARAFAC, which decomposes a tensor into a diagonal tensor and a set of factor matrices) and the Tucker (which produces as result a core tensor and a set of dimension-subspaces matrices). Experiment results show that, when the available metadata is used as a rough guide, the proposed multiresolution method helps fit both PARAFAC and Tucker models with consistent (under different parameters settings) savings in execution time and memory consumption, while preserving the quality of the decomposition.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2014:RMN, author = "Jin Huang and Feiping Nie and Heng Huang and Chris Ding", title = "Robust Manifold Nonnegative Matrix Factorization", journal = j-TKDD, volume = "8", number = "3", pages = "11:1--11:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601434", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 3 13:50:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Nonnegative Matrix Factorization (NMF) has been one of the most widely used clustering techniques for exploratory data analysis. However, since each data point enters the objective function with squared residue error, a few outliers with large errors easily dominate the objective function. In this article, we propose a Robust Manifold Nonnegative Matrix Factorization (RMNMF) method using l$_{2, 1}$ -norm and integrating NMF and spectral clustering under the same clustering framework. We also point out the solution uniqueness issue for the existing NMF methods and propose an additional orthonormal constraint to address this problem. With the new constraint, the conventional auxiliary function approach no longer works. We tackle this difficult optimization problem via a novel Augmented Lagrangian Method (ALM)--based algorithm and convert the original constrained optimization problem on one variable into a multivariate constrained problem. The new objective function then can be decomposed into several subproblems that each has a closed-form solution. More importantly, we reveal the connection of our method with robust K -means and spectral clustering, and we demonstrate its theoretical significance. Extensive experiments have been conducted on nine benchmark datasets, and all empirical results show the effectiveness of our method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2014:RAL, author = "Yu Zhang and Dit-Yan Yeung", title = "A Regularization Approach to Learning Task Relationships in Multitask Learning", journal = j-TKDD, volume = "8", number = "3", pages = "12:1--12:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2538028", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 3 13:50:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multitask learning is a learning paradigm that seeks to improve the generalization performance of a learning task with the help of some other related tasks. In this article, we propose a regularization approach to learning the relationships between tasks in multitask learning. This approach can be viewed as a novel generalization of the regularized formulation for single-task learning. Besides modeling positive task correlation, our approach-multitask relationship learning (MTRL)-can also describe negative task correlation and identify outlier tasks based on the same underlying principle. By utilizing a matrix-variate normal distribution as a prior on the model parameters of all tasks, our MTRL method has a jointly convex objective function. For efficiency, we use an alternating method to learn the optimal model parameters for each task as well as the relationships between tasks. We study MTRL in the symmetric multitask learning setting and then generalize it to the asymmetric setting as well. We also discuss some variants of the regularization approach to demonstrate the use of other matrix-variate priors for learning task relationships. Moreover, to gain more insight into our model, we also study the relationships between MTRL and some existing multitask learning methods. Experiments conducted on a toy problem as well as several benchmark datasets demonstrate the effectiveness of MTRL as well as its high interpretability revealed by the task covariance matrix.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2014:SCR, author = "Ming Lin and Shifeng Weng and Changshui Zhang", title = "On the Sample Complexity of Random {Fourier} Features for Online Learning: How Many Random {Fourier} Features Do We Need?", journal = j-TKDD, volume = "8", number = "3", pages = "13:1--13:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2611378", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 3 13:50:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We study the sample complexity of random Fourier features for online kernel learning-that is, the number of random Fourier features required to achieve good generalization performance. We show that when the loss function is strongly convex and smooth, online kernel learning with random Fourier features can achieve an $ O (l o g T / T) $ bound for the excess risk with only $ O (1 / \lambda^2) $ random Fourier features, where T is the number of training examples and \lambda is the modulus of strong convexity. This is a significant improvement compared to the existing result for batch kernel learning that requires $ O(T) $ random Fourier features to achieve a generalization bound $ O(1 / \sqrt T) $. Our empirical study verifies that online kernel learning with a limited number of random Fourier features can achieve similar generalization performance as online learning using full kernel matrix. We also present an enhanced online learning algorithm with random Fourier features that improves the classification performance by multiple passes of training examples and a partial average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Eyal:2014:PIM, author = "Ron Eyal and Avi Rosenfeld and Sigal Sina and Sarit Kraus", title = "Predicting and Identifying Missing Node Information in Social Networks", journal = j-TKDD, volume = "8", number = "3", pages = "14:1--14:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2536775", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jun 26 05:48:23 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In recent years, social networks have surged in popularity. One key aspect of social network research is identifying important missing information that is not explicitly represented in the network, or is not visible to all. To date, this line of research typically focused on finding the connections that are missing between nodes, a challenge typically termed as the link prediction problem. This article introduces the missing node identification problem, where missing members in the social network structure must be identified. In this problem, indications of missing nodes are assumed to exist. Given these indications and a partial network, we must assess which indications originate from the same missing node and determine the full network structure. Toward solving this problem, we present the missing node identification by spectral clustering algorithm (MISC), an approach based on a spectral clustering algorithm, combined with nodes' pairwise affinity measures that were adopted from link prediction research. We evaluate the performance of our approach in different problem settings and scenarios, using real-life data from Facebook. The results show that our approach has beneficial results and can be effective in solving the missing node identification problem. In addition, this article also presents R-MISC, which uses a sparse matrix representation, efficient algorithms for calculating the nodes' pairwise affinity, and a proprietary dimension reduction technique to enable scaling the MISC algorithm to large networks of more than 100,000 nodes. Last, we consider problem settings where some of the indications are unknown. Two algorithms are suggested for this problem: speculative MISC, based on MISC, and missing link completion, based on classical link prediction literature. We show that speculative MISC outperforms missing link completion.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Webb:2014:EDM, author = "Geoffrey I. Webb and Jilles Vreeken", title = "Efficient Discovery of the Most Interesting Associations", journal = j-TKDD, volume = "8", number = "3", pages = "15:1--15:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601433", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jun 26 05:48:23 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Self-sufficient itemsets have been proposed as an effective approach to summarizing the key associations in data. However, their computation appears highly demanding, as assessing whether an itemset is self-sufficient requires consideration of all pairwise partitions of the itemset into pairs of subsets as well as consideration of all supersets. This article presents the first published algorithm for efficiently discovering self-sufficient itemsets. This branch-and-bound algorithm deploys two powerful pruning mechanisms based on upper bounds on itemset value and statistical significance level. It demonstrates that finding top-$k$ productive and nonredundant itemsets, with postprocessing to identify those that are not independently productive, can efficiently identify small sets of key associations. We present extensive evaluation of the strengths and limitations of the technique, including comparisons with alternative approaches to finding the most interesting associations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shabtai:2014:ODM, author = "Asaf Shabtai and Maya Bercovitch and Lior Rokach and Yuval Elovici", title = "Optimizing Data Misuse Detection", journal = j-TKDD, volume = "8", number = "3", pages = "16:1--16:??", month = jun, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2611520", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 3 13:50:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data misuse may be performed by entities such as an organization's employees and business partners who are granted access to sensitive information and misuse their privileges. We assume that users can be either trusted or untrusted. The access of untrusted parties to data objects (e.g., client and patient records) should be monitored in an attempt to detect misuse. However, monitoring data objects is resource intensive and time-consuming and may also cause disturbance or inconvenience to the involved employees. Therefore, the monitored data objects should be carefully selected. In this article, we present two optimization problems carefully designed for selecting specific data objects for monitoring, such that the detection rate is maximized and the monitoring effort is minimized. In the first optimization problem, the goal is to select data objects for monitoring that are accessed by at most c trusted agents while ensuring access to at least k monitored objects by each untrusted agent (both c and k are integer variable). As opposed to the first optimization problem, the goal of the second optimization problem is to select monitored data objects that maximize the number of monitored data objects accessed by untrusted agents while ensuring that each trusted agent does not access more than d monitored data objects (d is an integer variable as well). Two efficient heuristic algorithms for solving these optimization problems are proposed, and experiments were conducted simulating different scenarios to evaluate the algorithms' performance. Moreover, we compared the heuristic algorithms' performance to the optimal solution and conducted sensitivity analysis on the three parameters (c, k, and d) and on the ratio between the trusted and untrusted agents.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hernandez-Orallo:2014:PRC, author = "Jos{\'e} Hern{\'a}ndez-Orallo", title = "Probabilistic Reframing for Cost-Sensitive Regression", journal = j-TKDD, volume = "8", number = "4", pages = "17:1--17:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2641758", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:02 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Common-day applications of predictive models usually involve the full use of the available contextual information. When the operating context changes, one may fine-tune the by-default (incontextual) prediction or may even abstain from predicting a value (a reject). Global reframing solutions, where the same function is applied to adapt the estimated outputs to a new cost context, are possible solutions here. An alternative approach, which has not been studied in a comprehensive way for regression in the knowledge discovery and data mining literature, is the use of a local (e.g., probabilistic) reframing approach, where decisions are made according to the estimated output and a reliability, confidence, or probability estimation. In this article, we advocate for a simple two-parameter (mean and variance) approach, working with a normal conditional probability density. Given the conditional mean produced by any regression technique, we develop lightweight ``enrichment'' methods that produce good estimates of the conditional variance, which are used by the probabilistic (local) reframing methods. We apply these methods to some very common families of cost-sensitive problems, such as optimal predictions in (auction) bids, asymmetric loss scenarios, and rejection rules.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Miettinen:2014:MMD, author = "Pauli Miettinen and Jilles Vreeken", title = "{MDL4BMF}: Minimum Description Length for {Boolean} Matrix Factorization", journal = j-TKDD, volume = "8", number = "4", pages = "18:1--18:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601437", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:45:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Matrix factorizations-where a given data matrix is approximated by a product of two or more factor matrices-are powerful data mining tools. Among other tasks, matrix factorizations are often used to separate global structure from noise. This, however, requires solving the ``model order selection problem'' of determining the proper rank of the factorization, that is, to answer where fine-grained structure stops, and where noise starts. Boolean Matrix Factorization (BMF)-where data, factors, and matrix product are Boolean-has in recent years received increased attention from the data mining community. The technique has desirable properties, such as high interpretability and natural sparsity. Yet, so far no method for selecting the correct model order for BMF has been available. In this article, we propose the use of the Minimum Description Length (MDL) principle for this task. Besides solving the problem, this well-founded approach has numerous benefits; for example, it is automatic, does not require a likelihood function, is fast, and, as experiments show, is highly accurate. We formulate the description length function for BMF in general-making it applicable for any BMF algorithm. We discuss how to construct an appropriate encoding: starting from a simple and intuitive approach, we arrive at a highly efficient data-to-model--based encoding for BMF. We extend an existing algorithm for BMF to use MDL to identify the best Boolean matrix factorization, analyze the complexity of the problem, and perform an extensive experimental evaluation to study its behavior.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2014:FSS, author = "Jiliang Tang and Huan Liu", title = "Feature Selection for Social Media Data", journal = j-TKDD, volume = "8", number = "4", pages = "19:1--19:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629587", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:45:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Feature selection is widely used in preparing high-dimensional data for effective data mining. The explosive popularity of social media produces massive and high-dimensional data at an unprecedented rate, presenting new challenges to feature selection. Social media data consists of (1) traditional high-dimensional, attribute-value data such as posts, tweets, comments, and images, and (2) linked data that provides social context for posts and describes the relationships between social media users as well as who generates the posts, and so on. The nature of social media also determines that its data is massive, noisy, and incomplete, which exacerbates the already challenging problem of feature selection. In this article, we study a novel feature selection problem of selecting features for social media data with its social context. In detail, we illustrate the differences between attribute-value data and social media data, investigate if linked data can be exploited in a new feature selection framework by taking advantage of social science theories. We design and conduct experiments on datasets from real-world social media Web sites, and the empirical results demonstrate that the proposed framework can significantly improve the performance of feature selection. Further experiments are conducted to evaluate the effects of user--user and user--post relationships manifested in linked data on feature selection, and research issues for future work will be discussed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Riondato:2014:EDA, author = "Matteo Riondato and Eli Upfal", title = "Efficient Discovery of Association Rules and Frequent Itemsets through Sampling with Tight Performance Guarantees", journal = j-TKDD, volume = "8", number = "4", pages = "20:1--20:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629586", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:45:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The tasks of extracting (top-$K$) Frequent Itemsets (FIs) and Association Rules (ARs) are fundamental primitives in data mining and database applications. Exact algorithms for these problems exist and are widely used, but their running time is hindered by the need of scanning the entire dataset, possibly multiple times. High-quality approximations of FIs and ARs are sufficient for most practical uses. Sampling techniques can be used for fast discovery of approximate solutions, but works exploring this technique did not provide satisfactory performance guarantees on the quality of the approximation due to the difficulty of bounding the probability of under- or oversampling any one of an unknown number of frequent itemsets. We circumvent this issue by applying the statistical concept of Vapnik--Chervonenkis (VC) dimension to develop a novel technique for providing tight bounds on the sample size that guarantees approximation of the (top-$K$) FIs and ARs within user-specified parameters. The resulting sample size is linearly dependent on the VC-dimension of a range space associated with the dataset. We analyze the VC-dimension of this range space and show that it is upper bounded by an easy-to-compute characteristic quantity of the dataset, the d-index, namely, the maximum integer d such that the dataset contains at least d transactions of length at least d such that no one of them is a superset of or equal to another. We show that this bound is tight for a large class of datasets. The resulting sample size is a significant improvement over previous known results. We present an extensive experimental evaluation of our technique on real and artificial datasets, demonstrating the practicality of our methods, and showing that they achieve even higher quality approximations than what is guaranteed by the analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Burton:2014:DSC, author = "Scott H. Burton and Christophe G. Giraud-Carrier", title = "Discovering Social Circles in Directed Graphs", journal = j-TKDD, volume = "8", number = "4", pages = "21:1--21:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2641759", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:02 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We examine the problem of identifying social circles, or sets of cohesive and mutually aware nodes surrounding an initial query set, in directed graphs where the complete graph is not known beforehand. This problem differs from local community mining, in that the query set defines the circle of interest. We explicitly handle edge direction, as in many cases relationships are not symmetric, and focus on the local context because many real-world graphs cannot be feasibly known. We outline several issues that are unique to this context, introduce a quality function to measure the value of including a particular node in an emerging social circle, and describe a greedy social circle discovery algorithm. We demonstrate the effectiveness of this approach on artificial benchmarks, large networks with topical community labels, and several real-world case studies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Paul:2014:RPL, author = "Saurabh Paul and Christos Boutsidis and Malik Magdon-Ismail and Petros Drineas", title = "Random Projections for Linear Support Vector Machines", journal = j-TKDD, volume = "8", number = "4", pages = "22:1--22:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2641760", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:45:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Let $X$ be a data matrix of rank $ \rho $, whose rows represent $n$ points in $d$-dimensional space. The linear support vector machine constructs a hyperplane separator that maximizes the 1-norm soft margin. We develop a new oblivious dimension reduction technique that is precomputed and can be applied to any input matrix $X$. We prove that, with high probability, the margin and minimum enclosing ball in the feature space are preserved to within $ \epsilon $-relative error, ensuring comparable generalization as in the original space in the case of classification. For regression, we show that the margin is preserved to $ \epsilon $-relative error with high probability. We present extensive experiments with real and synthetic data to support our theory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Erdo:2014:RGN, author = "D{\'o}ra Erd{\H{o}}s and Rainer Gemulla and Evimaria Terzi", title = "Reconstructing Graphs from Neighborhood Data", journal = j-TKDD, volume = "8", number = "4", pages = "23:1--23:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2641761", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:02 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Consider a social network and suppose that we are only given the number of common friends between each pair of users. Can we reconstruct the underlying network? Similarly, consider a set of documents and the words that appear in them. If we only know the number of common words for every pair of documents, as well as the number of common documents for every pair of words, can we infer which words appear in which documents? In this article, we develop a general methodology for answering questions like these. We formalize these questions in what we call the {\em R}econstruct problem: given information about the common neighbors of nodes in a network, our goal is to reconstruct the hidden binary matrix that indicates the presence or absence of relationships between individual nodes. In fact, we propose two different variants of this problem: one where the number of connections of every node (i.e., the degree of every node) is known and a second one where it is unknown. We call these variants the degree-aware and the degree-oblivious versions of the Reconstruct problem, respectively. Our algorithms for both variants exploit the properties of the singular value decomposition of the hidden binary matrix. More specifically, we show that using the available neighborhood information, we can reconstruct the hidden matrix by finding the components of its singular value decomposition and then combining them appropriately. Our extensive experimental study suggests that our methods are able to reconstruct binary matrices of different characteristics with up to 100\% accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Acharya:2014:OFC, author = "Ayan Acharya and Eduardo R. Hruschka and Joydeep Ghosh and Sreangsu Acharyya", title = "An Optimization Framework for Combining Ensembles of Classifiers and Clusterers with Applications to Nontransductive Semisupervised Learning and Transfer Learning", journal = j-TKDD, volume = "9", number = "1", pages = "1:1--1:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2601435", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:05 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Unsupervised models can provide supplementary soft constraints to help classify new ``target'' data because similar instances in the target set are more likely to share the same class label. Such models can also help detect possible differences between training and target distributions, which is useful in applications where concept drift may take place, as in transfer learning settings. This article describes a general optimization framework that takes as input class membership estimates from existing classifiers learned on previously encountered ``source'' (or training) data, as well as a similarity matrix from a cluster ensemble operating solely on the target (or test) data to be classified, and yields a consensus labeling of the target data. More precisely, the application settings considered are nontransductive semisupervised and transfer learning scenarios where the training data are used only to build an ensemble of classifiers and are subsequently discarded before classifying the target data. The framework admits a wide range of loss functions and classification/clustering methods. It exploits properties of Bregman divergences in conjunction with Legendre duality to yield a principled and scalable approach. A variety of experiments show that the proposed framework can yield results substantially superior to those provided by na{\"\i}vely applying classifiers learned on the original task to the target data. In addition, we show that the proposed approach, even not being conceptually transductive, can provide better results compared to some popular transductive learning techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Boedihardjo:2014:FEL, author = "Arnold P. Boedihardjo and Chang-Tien Lu and Bingsheng Wang", title = "A Framework for Exploiting Local Information to Enhance Density Estimation of Data Streams", journal = j-TKDD, volume = "9", number = "1", pages = "2:1--2:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629618", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:05 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The Probability Density Function (PDF) is the fundamental data model for a variety of stream mining algorithms. Existing works apply the standard nonparametric Kernel Density Estimator (KDE) to approximate the PDF of data streams. As a result, the stream-based KDEs cannot accurately capture complex local density features. In this article, we propose the use of Local Region (LRs) to model local density information in univariate data streams. In-depth theoretical analyses are presented to justify the effectiveness of the LR-based KDE. Based on the analyses, we develop the General Local rEgion AlgorithM (GLEAM) to enhance the estimation quality of structurally complex univariate distributions for existing stream-based KDEs. A set of algorithmic optimizations is designed to improve the query throughput of GLEAM and to achieve its linear order computation. Additionally, a comprehensive suite of experiments was conducted to test the effectiveness and efficiency of GLEAM.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ordonez:2014:BVS, author = "Carlos Ordonez and Carlos Garcia-Alvarado and Veerabhadaran Baladandayuthapani", title = "{Bayesian} Variable Selection in Linear Regression in One Pass for Large Datasets", journal = j-TKDD, volume = "9", number = "1", pages = "3:1--3:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629617", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:05 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Bayesian models are generally computed with Markov Chain Monte Carlo (MCMC) methods. The main disadvantage of MCMC methods is the large number of iterations they need to sample the posterior distributions of model parameters, especially for large datasets. On the other hand, variable selection remains a challenging problem due to its combinatorial search space, where Bayesian models are a promising solution. In this work, we study how to accelerate Bayesian model computation for variable selection in linear regression. We propose a fast Gibbs sampler algorithm, a widely used MCMC method that incorporates several optimizations. We use a Zellner prior for the regression coefficients, an improper prior on variance, and a conjugate prior Gaussian distribution, which enable dataset summarization in one pass, thus exploiting an augmented set of sufficient statistics. Thereafter, the algorithm iterates in main memory. Sufficient statistics are indexed with a sparse binary vector to efficiently compute matrix projections based on selected variables. Discovered variable subsets probabilities, selecting and discarding each variable, are stored on a hash table for fast retrieval in future iterations. We study how to integrate our algorithm into a Database Management System (DBMS), exploiting aggregate User-Defined Functions for parallel data summarization and stored procedures to manipulate matrices with arrays. An experimental evaluation with real datasets evaluates accuracy and time performance, comparing our DBMS-based algorithm with the R package. Our algorithm is shown to produce accurate results, scale linearly on dataset size, and run orders of magnitude faster than the R package.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fei:2014:SSB, author = "Hongliang Fei and Jun Huan", title = "Structured Sparse Boosting for Graph Classification", journal = j-TKDD, volume = "9", number = "1", pages = "4:1--4:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629328", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:05 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Boosting is a highly effective algorithm that produces a linear combination of weak classifiers (a.k.a. base learners) to obtain high-quality classification models. In this article, we propose a generalized logit boost algorithm in which base learners have structural relationships in the functional space. Although such relationships are generic, our work is particularly motivated by the emerging topic of pattern-based classification for semistructured data including graphs. Toward an efficient incorporation of the structure information, we have designed a general model in which we use an undirected graph to capture the relationship of subgraph-based base learners. In our method, we employ both L$_1$ and Laplacian-based L$_2$ regularization to logit boosting to achieve model sparsity and smoothness in the functional space spanned by the base learners. We have derived efficient optimization algorithms based on coordinate descent for the new boosting formulation and theoretically prove that it exhibits a natural grouping effect for nearby spatial or overlapping base learners and that the resulting estimator is consistent. Additionally, motivated by the connection between logit boosting and logistic regression, we extend our structured sparse regularization framework to logistic regression for vectorial data in which features are structured. Using comprehensive experimental study and comparing our work with the state-of-the-art, we have demonstrated the effectiveness of the proposed learning method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2014:GGB, author = "Zhiqiang Xu and Yiping Ke and Yi Wang and Hong Cheng and James Cheng", title = "{GBAGC}: a General {Bayesian} Framework for Attributed Graph Clustering", journal = j-TKDD, volume = "9", number = "1", pages = "5:1--5:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629616", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:05 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph clustering, also known as community detection, is a long-standing problem in data mining. In recent years, with the proliferation of rich attribute information available for objects in real-world graphs, how to leverage not only structural but also attribute information for clustering attributed graphs becomes a new challenge. Most existing works took a distance-based approach. They proposed various distance measures to fuse structural and attribute information and then applied standard techniques for graph clustering based on these distance measures. In this article, we take an alternative view and propose a novel Bayesian framework for attributed graph clustering. Our framework provides a general and principled solution to modeling both the structural and the attribute aspects of a graph. It avoids the artificial design of a distance measure in existing methods and, furthermore, can seamlessly handle graphs with different types of edges and vertex attributes. We develop an efficient variational method for graph clustering under this framework and derive two concrete algorithms for clustering unweighted and weighted attributed graphs. Experimental results on large real-world datasets show that our algorithms significantly outperform the state-of-the-art distance-based method, in terms of both effectiveness and efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Coscia:2014:UHO, author = "Michele Coscia and Giulio Rossetti and Fosca Giannotti and Dino Pedreschi", title = "Uncovering Hierarchical and Overlapping Communities with a Local-First Approach", journal = j-TKDD, volume = "9", number = "1", pages = "6:1--6:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629511", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Aug 26 17:49:05 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community discovery in complex networks is the task of organizing a network's structure by grouping together nodes related to each other. Traditional approaches are based on the assumption that there is a global-level organization in the network. However, in many scenarios, each node is the bearer of complex information and cannot be classified in disjoint clusters. The top-down global view of the partition approach is not designed for this. Here, we represent this complex information as multiple latent labels, and we postulate that edges in the networks are created among nodes carrying similar labels. The latent labels are the communities a node belongs to and we discover them with a simple local-first approach to community discovery. This is achieved by democratically letting each node vote for the communities it sees surrounding it in its limited view of the global system, its ego neighborhood, using a label propagation algorithm, assuming that each node is aware of the label it shares with each of its connections. The local communities are merged hierarchically, unveiling the modular organization of the network at the global level and identifying overlapping groups and groups of groups. We tested this intuition against the state-of-the-art overlapping community discovery and found that our new method advances in the chosen scenarios in the quality of the obtained communities. We perform a test on benchmark and on real-world networks, evaluating the quality of the community coverage by using the extracted communities to predict the metadata attached to the nodes, which we consider external information about the latent labels. We also provide an explanation about why real-world networks contain overlapping communities and how our logic is able to capture them. Finally, we show how our method is deterministic, is incremental, and has a limited time complexity, so that it can be used on real-world scale networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2014:GML, author = "Guangtao Wang and Qinbao Song and Xueying Zhang and Kaiyuan Zhang", title = "A Generic Multilabel Learning-Based Classification Algorithm Recommendation Method", journal = j-TKDD, volume = "9", number = "1", pages = "7:1--7:??", month = oct, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629474", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Oct 10 17:19:10 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "As more and more classification algorithms continue to be developed, recommending appropriate algorithms to a given classification problem is increasingly important. This article first distinguishes the algorithm recommendation methods by two dimensions: (1) meta-features, which are a set of measures used to characterize the learning problems, and (2) meta-target, which represents the relative performance of the classification algorithms on the learning problem. In contrast to the existing algorithm recommendation methods whose meta-target is usually in the form of either the ranking of candidate algorithms or a single algorithm, this article proposes a new and natural multilabel form to describe the meta-target. This is due to the fact that there would be multiple algorithms being appropriate for a given problem in practice. Furthermore, a novel multilabel learning-based generic algorithm recommendation method is proposed, which views the algorithm recommendation as a multilabel learning problem and solves the problem by the mature multilabel learning algorithms. To evaluate the proposed multilabel learning-based recommendation method, extensive experiments with 13 well-known classification algorithms, two kinds of meta-targets such as algorithm ranking and single algorithm, and five different kinds of meta-features are conducted on 1,090 benchmark learning problems. The results show the effectiveness of our proposed multilabel learning-based recommendation method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2014:EEM, author = "Pinghui Wang and John C. S. Lui and Bruno Ribeiro and Don Towsley and Junzhou Zhao and Xiaohong Guan", title = "Efficiently Estimating Motif Statistics of Large Networks", journal = j-TKDD, volume = "9", number = "2", pages = "8:1--8:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629564", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Exploring statistics of locally connected subgraph patterns (also known as network motifs) has helped researchers better understand the structure and function of biological and Online Social Networks (OSNs). Nowadays, the massive size of some critical networks-often stored in already overloaded relational databases-effectively limits the rate at which nodes and edges can be explored, making it a challenge to accurately discover subgraph statistics. In this work, we propose sampling methods to accurately estimate subgraph statistics from as few queried nodes as possible. We present sampling algorithms that efficiently and accurately estimate subgraph properties of massive networks. Our algorithms require no precomputation or complete network topology information. At the same time, we provide theoretical guarantees of convergence. We perform experiments using widely known datasets and show that, for the same accuracy, our algorithms require an order of magnitude less queries (samples) than the current state-of-the-art algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zheng:2014:FHE, author = "Li Zheng and Tao Li and Chris Ding", title = "A Framework for Hierarchical Ensemble Clustering", journal = j-TKDD, volume = "9", number = "2", pages = "9:1--9:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2611380", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Ensemble clustering, as an important extension of the clustering problem, refers to the problem of combining different (input) clusterings of a given dataset to generate a final (consensus) clustering that is a better fit in some sense than existing clusterings. Over the past few years, many ensemble clustering approaches have been developed. However, most of them are designed for partitional clustering methods, and few research efforts have been reported for ensemble hierarchical clustering methods. In this article, a hierarchical ensemble clustering framework that can naturally combine both partitional clustering and hierarchical clustering results is proposed. In addition, a novel method for learning the ultra-metric distance from the aggregated distance matrices and generating final hierarchical clustering with enhanced cluster separation is developed based on the ultra-metric distance for hierarchical clustering. We study three important problems: dendrogram description, dendrogram combination, and dendrogram selection. We develop two approaches for dendrogram selection based on tree distances, and we investigate various dendrogram distances for representing dendrograms. We provide a systematic empirical study of the ensemble hierarchical clustering problem. Experimental results demonstrate the effectiveness of our proposed approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huai:2014:TPC, author = "Baoxing Huai and Enhong Chen and Hengshu Zhu and Hui Xiong and Tengfei Bao and Qi Liu and Jilei Tian", title = "Toward Personalized Context Recognition for Mobile Users: a Semisupervised {Bayesian} {HMM} Approach", journal = j-TKDD, volume = "9", number = "2", pages = "10:1--10:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629504", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The problem of mobile context recognition targets the identification of semantic meaning of context in a mobile environment. This plays an important role in understanding mobile user behaviors and thus provides the opportunity for the development of better intelligent context-aware services. A key step of context recognition is to model the personalized contextual information of mobile users. Although many studies have been devoted to mobile context modeling, limited efforts have been made on the exploitation of the sequential and dependency characteristics of mobile contextual information. Also, the latent semantics behind mobile context are often ambiguous and poorly understood. Indeed, a promising direction is to incorporate some domain knowledge of common contexts, such as ``waiting for a bus'' or ``having dinner,'' by modeling both labeled and unlabeled context data from mobile users because there are often few labeled contexts available in practice. To this end, in this article, we propose a sequence-based semisupervised approach to modeling personalized context for mobile users. Specifically, we first exploit the Bayesian Hidden Markov Model (B-HMM) for modeling context in the form of probabilistic distributions and transitions of raw context data. Also, we propose a sequential model by extending B-HMM with the prior knowledge of contextual features to model context more accurately. Then, to efficiently learn the parameters and initial values of the proposed models, we develop a novel approach for parameter estimation by integrating the Dirichlet Process Mixture (DPM) model and the Mixture Unigram (MU) model. Furthermore, by incorporating both user-labeled and unlabeled data, we propose a semisupervised learning-based algorithm to identify and model the latent semantics of context. Finally, experimental results on real-world data clearly validate both the efficiency and effectiveness of the proposed approaches for recognizing personalized context of mobile users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2014:ADI, author = "Siyuan Liu and Lei Chen and Lionel M. Ni", title = "Anomaly Detection from Incomplete Data", journal = j-TKDD, volume = "9", number = "2", pages = "11:1--11:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629668", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Anomaly detection (a.k.a., outlier or burst detection) is a well-motivated problem and a major data mining and knowledge discovery task. In this article, we study the problem of population anomaly detection, one of the key issues related to event monitoring and population management within a city. Through studying detected population anomalies, we can trace and analyze these anomalies, which could help to model city traffic design and event impact analysis and prediction. Although a significant and interesting issue, it is very hard to detect population anomalies and retrieve anomaly trajectories, especially given that it is difficult to get actual and sufficient population data. To address the difficulties of a lack of real population data, we take advantage of mobile phone networks, which offer enormous spatial and temporal communication data on persons. More importantly, we claim that we can utilize these mobile phone data to infer and approximate population data. Thus, we can study the population anomaly detection problem by taking advantages of unique features hidden in mobile phone data. In this article, we present a system to conduct Population Anomaly Detection (PAD). First, we propose an effective clustering method, correlation-based clustering, to cluster the incomplete location information from mobile phone data (i.e., from mobile call volume distribution to population density distribution). Then, we design an adaptive parameter-free detection method, R-scan, to capture the distributed dynamic anomalies. Finally, we devise an efficient algorithm, BT-miner, to retrieve anomaly trajectories. The experimental results from real-life mobile phone data confirm the effectiveness and efficiency of the proposed algorithms. Finally, the proposed methods are realized as a pilot system in a city in China.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gundecha:2014:UVR, author = "Pritam Gundecha and Geoffrey Barbier and Jiliang Tang and Huan Liu", title = "User Vulnerability and Its Reduction on a Social Networking Site", journal = j-TKDD, volume = "9", number = "2", pages = "12:1--12:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2630421", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Privacy and security are major concerns for many users of social media. When users share information (e.g., data and photos) with friends, they can make their friends vulnerable to security and privacy breaches with dire consequences. With the continuous expansion of a user's social network, privacy settings alone are often inadequate to protect a user's profile. In this research, we aim to address some critical issues related to privacy protection: (1) How can we measure and assess individual users' vulnerability? (2) With the diversity of one's social network friends, how can one figure out an effective approach to maintaining balance between vulnerability and social utility? In this work, first we present a novel way to define vulnerable friends from an individual user's perspective. User vulnerability is dependent on whether or not the user's friends' privacy settings protect the friend and the individual's network of friends (which includes the user). We show that it is feasible to measure and assess user vulnerability and reduce one's vulnerability without changing the structure of a social networking site. The approach is to unfriend one's most vulnerable friends. However, when such a vulnerable friend is also socially important, unfriending him or her would significantly reduce one's own social status. We formulate this novel problem as vulnerability minimization with social utility constraints. We formally define the optimization problem and provide an approximation algorithm with a proven bound. Finally, we conduct a large-scale evaluation of a new framework using a Facebook dataset. We resort to experiments and observe how much vulnerability an individual user can be decreased by unfriending a vulnerable friend. We compare performance of different unfriending strategies and discuss the security risk of new friend requests. Additionally, by employing different forms of social utility, we confirm that the balance between user vulnerability and social utility can be practically achieved.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Duan:2014:SRC, author = "Lian Duan and W. Nick Street and Yanchi Liu and Songhua Xu and Brook Wu", title = "Selecting the Right Correlation Measure for Binary Data", journal = j-TKDD, volume = "9", number = "2", pages = "13:1--13:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2637484", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Finding the most interesting correlations among items is essential for problems in many commercial, medical, and scientific domains. Although there are numerous measures available for evaluating correlations, different correlation measures provide drastically different results. Piatetsky-Shapiro provided three mandatory properties for any reasonable correlation measure, and Tan et al. proposed several properties to categorize correlation measures; however, it is still hard for users to choose the desirable correlation measures according to their needs. In order to solve this problem, we explore the effectiveness problem in three ways. First, we propose two desirable properties and two optional properties for correlation measure selection and study the property satisfaction for different correlation measures. Second, we study different techniques to adjust correlation measures and propose two new correlation measures: the Simplified $ \chi^2 $ with Continuity Correction and the Simplified $ \chi^2 $ with Support. Third, we analyze the upper and lower bounds of different measures and categorize them by the bound differences. Combining these three directions, we provide guidelines for users to choose the proper measure according to their needs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2014:PBA, author = "Hao Huang and Hong Qin and Shinjae Yoo and Dantong Yu", title = "Physics-Based Anomaly Detection Defined on Manifold Space", journal = j-TKDD, volume = "9", number = "2", pages = "14:1--14:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2641574", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Oct 7 18:49:26 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Current popular anomaly detection algorithms are capable of detecting global anomalies but often fail to distinguish local anomalies from normal instances. Inspired by contemporary physics theory (i.e., heat diffusion and quantum mechanics), we propose two unsupervised anomaly detection algorithms. Building on the embedding manifold derived from heat diffusion, we devise Local Anomaly Descriptor (LAD), which faithfully reveals the intrinsic neighborhood density. It uses a scale-dependent umbrella operator to bridge global and local properties, which makes LAD more informative within an adaptive scope of neighborhood. To offer more stability of local density measurement on scaling parameter tuning, we formulate Fermi Density Descriptor (FDD), which measures the probability of a fermion particle being at a specific location. By choosing the stable energy distribution function, FDD steadily distinguishes anomalies from normal instances with any scaling parameter setting. To further enhance the efficacy of our proposed algorithms, we explore the utility of anisotropic Gaussian kernel (AGK), which offers better manifold-aware affinity information. We also quantify and examine the effect of different Laplacian normalizations for anomaly detection. Comprehensive experiments on both synthetic and benchmark datasets verify that our proposed algorithms outperform the existing anomaly detection algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gionis:2015:ISI, author = "Aristides Gionis and Hang Li", title = "Introduction to the Special Issue {ACM SIGKDD} 2013", journal = j-TKDD, volume = "9", number = "3", pages = "15:1--15:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700993", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15e", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jha:2015:SES, author = "Madhav Jha and C. Seshadhri and Ali Pinar", title = "A Space-Efficient Streaming Algorithm for Estimating Transitivity and Triangle Counts Using the Birthday Paradox", journal = j-TKDD, volume = "9", number = "3", pages = "15:1--15:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700395", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 6 09:34:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We design a space-efficient algorithm that approximates the transitivity (global clustering coefficient) and total triangle count with only a single pass through a graph given as a stream of edges. Our procedure is based on the classic probabilistic result, the birthday paradox. When the transitivity is constant and there are more edges than wedges (common properties for social networks), we can prove that our algorithm requires $ O(\sqrt n) $ space ($n$ is the number of vertices) to provide accurate estimates. We run a detailed set of experiments on a variety of real graphs and demonstrate that the memory requirement of the algorithm is a tiny fraction of the graph. For example, even for a graph with 200 million edges, our algorithm stores just 40,000 edges to give accurate results. Being a single pass streaming algorithm, our procedure also maintains a real-time estimate of the transitivity/number of triangles of a graph by storing a minuscule fraction of edges.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2015:FMT, author = "Lu-An Tang and Xiao Yu and Quanquan Gu and Jiawei Han and Guofei Jiang and Alice Leung and Thomas {La Porta}", title = "A Framework of Mining Trajectories from Untrustworthy Data in Cyber-Physical System", journal = j-TKDD, volume = "9", number = "3", pages = "16:1--16:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700394", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 6 09:34:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A cyber-physical system (CPS) integrates physical (i.e., sensor) devices with cyber (i.e., informational) components to form a context-sensitive system that responds intelligently to dynamic changes in real-world situations. The CPS has wide applications in scenarios such as environment monitoring, battlefield surveillance, and traffic control. One key research problem of CPS is called mining lines in the sand. With a large number of sensors (sand) deployed in a designated area, the CPS is required to discover all trajectories (lines) of passing intruders in real time. There are two crucial challenges that need to be addressed: (1) the collected sensor data are not trustworthy, and (2) the intruders do not send out any identification information. The system needs to distinguish multiple intruders and track their movements. This study proposes a method called LiSM (Line-in-the-Sand Miner) to discover trajectories from untrustworthy sensor data. LiSM constructs a watching network from sensor data and computes the locations of intruder appearances based on the link information of the network. The system retrieves a cone model from the historical trajectories to track multiple intruders. Finally, the system validates the mining results and updates sensors' reliability scores in a feedback process. In addition, LoRM (Line-on-the-Road Miner) is proposed for trajectory discovery on road networks- mining lines on the roads. LoRM employs a filtering-and-refinement framework to reduce the distance computational overhead on road networks and uses a shortest-path-measure to track intruders. The proposed methods are evaluated with extensive experiments on big datasets. The experimental results show that the proposed methods achieve higher accuracy and efficiency in trajectory mining tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2015:QDR, author = "Zheng Wang and Jieping Ye", title = "Querying Discriminative and Representative Samples for Batch Mode Active Learning", journal = j-TKDD, volume = "9", number = "3", pages = "17:1--17:??", month = feb, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700408", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 6 09:34:37 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Empirical risk minimization (ERM) provides a principled guideline for many machine learning and data mining algorithms. Under the ERM principle, one minimizes an upper bound of the true risk, which is approximated by the summation of empirical risk and the complexity of the candidate classifier class. To guarantee a satisfactory learning performance, ERM requires that the training data are i.i.d. sampled from the unknown source distribution. However, this may not be the case in active learning, where one selects the most informative samples to label, and these data may not follow the source distribution. In this article, we generalize the ERM principle to the active learning setting. We derive a novel form of upper bound for the true risk in the active learning setting; by minimizing this upper bound, we develop a practical batch mode active learning method. The proposed formulation involves a nonconvex integer programming optimization problem. We solve it efficiently by an alternating optimization method. Our method is shown to query the most informative samples while preserving the source distribution as much as possible, thus identifying the most uncertain and representative queries. We further extend our method to multiclass active learning by introducing novel pseudolabels in the multiclass case and developing an efficient algorithm. Experiments on benchmark datasets and real-world applications demonstrate the superior performance of our proposed method compared to state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gopal:2015:HBI, author = "Siddharth Gopal and Yiming Yang", title = "Hierarchical {Bayesian} Inference and Recursive Regularization for Large-Scale Classification", journal = j-TKDD, volume = "9", number = "3", pages = "18:1--18:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629585", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article, we address open challenges in large-scale classification, focusing on how to effectively leverage the dependency structures (hierarchical or graphical) among class labels, and how to make the inference scalable in jointly optimizing all model parameters. We propose two main approaches, namely the hierarchical Bayesian inference framework and the recursive regularization scheme. The key idea in both approaches is to reinforce the similarity among parameter across the nodes in a hierarchy or network based on the proximity and connectivity of the nodes. For scalability, we develop hierarchical variational inference algorithms and fast dual coordinate descent training procedures with parallelization. In our experiments for classification problems with hundreds of thousands of classes and millions of training instances with terabytes of parameters, the proposed methods show consistent and statistically significant improvements over other competing approaches, and the best results on multiple benchmark datasets for large-scale classification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yin:2015:MLB, author = "Hongzhi Yin and Bin Cui and Ling Chen and Zhiting Hu and Chengqi Zhang", title = "Modeling Location-Based User Rating Profiles for Personalized Recommendation", journal = j-TKDD, volume = "9", number = "3", pages = "19:1--19:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2663356", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article proposes LA-LDA, a location-aware probabilistic generative model that exploits location-based ratings to model user profiles and produce recommendations. Most of the existing recommendation models do not consider the spatial information of users or items; however, LA-LDA supports three classes of location-based ratings, namely spatial user ratings for nonspatial items, nonspatial user ratings for spatial items, and spatial user ratings for spatial items. LA-LDA consists of two components, ULA-LDA and ILA-LDA, which are designed to take into account user and item location information, respectively. The component ULA-LDA explicitly incorporates and quantifies the influence from local public preferences to produce recommendations by considering user home locations, whereas the component ILA-LDA recommends items that are closer in both taste and travel distance to the querying users by capturing item co-occurrence patterns, as well as item location co-occurrence patterns. The two components of LA-LDA can be applied either separately or collectively, depending on the available types of location-based ratings. To demonstrate the applicability and flexibility of the LA-LDA model, we deploy it to both top-$k$ recommendation and cold start recommendation scenarios. Experimental evidence on large-scale real-world data, including the data from Gowalla (a location-based social network), DoubanEvent (an event-based social network), and MovieLens (a movie recommendation system), reveal that LA-LDA models user profiles more accurately by outperforming existing recommendation models for top-$k$ recommendation and the cold start problem.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2015:PSD, author = "Juhua Hu and De-Chuan Zhan and Xintao Wu and Yuan Jiang and Zhi-Hua Zhou", title = "Pairwised Specific Distance Learning from Physical Linkages", journal = j-TKDD, volume = "9", number = "3", pages = "20:1--20:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700405", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In real tasks, usually a good classification performance can only be obtained when a good distance metric is obtained; therefore, distance metric learning has attracted significant attention in the past few years. Typical studies of distance metric learning evaluate how to construct an appropriate distance metric that is able to separate training data points from different classes or satisfy a set of constraints (e.g., must-links and/or cannot-links). It is noteworthy that this task becomes challenging when there are only limited labeled training data points and no constraints are given explicitly. Moreover, most existing approaches aim to construct a global distance metric that is applicable to all data points. However, different data points may have different properties and may require different distance metrics. We notice that data points in real tasks are often connected by physical links (e.g., people are linked with each other in social networks; personal webpages are often connected to other webpages, including nonpersonal webpages), but the linkage information has not been exploited in distance metric learning. In this article, we develop a pairwised specific distance (PSD) approach that exploits the structures of physical linkages and in particular captures the key observations that nonmetric and clique linkages imply the appearance of different or unique semantics, respectively. It is noteworthy that, rather than generating a global distance, PSD generates different distances for different pairs of data points; this property is desired in applications involving complicated data semantics. We mainly present PSD for multi-class learning and further extend it to multi-label learning. Experimental results validate the effectiveness of PSD, especially in the scenarios in which there are very limited labeled training data points and no explicit constraints are given.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Soundarajan:2015:ULG, author = "Sucheta Soundarajan and John E. Hopcroft", title = "Use of Local Group Information to Identify Communities in Networks", journal = j-TKDD, volume = "9", number = "3", pages = "21:1--21:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700404", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The recent interest in networks has inspired a broad range of work on algorithms and techniques to characterize, identify, and extract communities from networks. Such efforts are complicated by a lack of consensus on what a ``community'' truly is, and these disagreements have led to a wide variety of mathematical formulations for describing communities. Often, these mathematical formulations, such as modularity and conductance, have been founded in the general principle that communities, like a $ G(n, p) $ graph, are ``round,'' with connections throughout the entire community, and so algorithms were developed to optimize such mathematical measures. More recently, a variety of algorithms have been developed that, rather than expecting connectivity through the entire community, seek out very small groups of well-connected nodes and then connect these groups into larger communities. In this article, we examine seven real networks, each containing external annotation that allows us to identify ``annotated communities.'' A study of these annotated communities gives insight into why the second category of community detection algorithms may be more successful than the first category. We then present a flexible algorithm template that is based on the idea of joining together small sets of nodes. In this template, we first identify very small, tightly connected ``subcommunities'' of nodes, each corresponding to a single node's ``perception'' of the network around it. We then create a new network in which each node represents such a subcommunity, and then identify communities in this new network. Because each node can appear in multiple subcommunities, this method allows us to detect overlapping communities. When evaluated on real data, we show that our template outperforms many other state-of-the-art algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2015:UCN, author = "Pinghui Wang and Junzhou Zhao and John C. S. Lui and Don Towsley and Xiaohong Guan", title = "Unbiased Characterization of Node Pairs over Large Graphs", journal = j-TKDD, volume = "9", number = "3", pages = "22:1--22:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700393", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Characterizing user pair relationships is important for applications such as friend recommendation and interest targeting in online social networks (OSNs). Due to the large-scale nature of such networks, it is infeasible to enumerate all user pairs and thus sampling is used. In this article, we show that it is a great challenge for OSN service providers to characterize user pair relationships, even when they possess the complete graph topology. The reason is that when sampling techniques (i.e., uniform vertex sampling (UVS) and random walk (RW)) are naively applied, they can introduce large biases, particularly for estimating similarity distribution of user pairs with constraints like existence of mutual neighbors, which is important for applications such as identifying network homophily. Estimating statistics of user pairs is more challenging in the absence of the complete topology information, as an unbiased sampling technique like UVS is usually not allowed and exploring the OSN graph topology is expensive. To address these challenges, we present unbiased sampling methods to characterize user pair properties based on UVS and RW techniques. We carry out an evaluation of our methods to show their accuracy and efficiency. Finally, we apply our methods to three OSNs-Foursquare, Douban, and Xiami-and discover that significant homophily is present in these networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Vlachos:2015:DPC, author = "Michail Vlachos and Johannes Schneider and Vassilios G. Vassiliadis", title = "On Data Publishing with Clustering Preservation", journal = j-TKDD, volume = "9", number = "3", pages = "23:1--23:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700403", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The emergence of cloud-based storage services is opening up new avenues in data exchange and data dissemination. This has amplified the interest in right-protection mechanisms to establish ownership in the event of data leakage. Current right-protection technologies, however, rarely provide strong guarantees on dataset utility after the protection process. This work presents techniques that explicitly address this topic and provably preserve the outcome of certain mining operations. In particular, we take special care to guarantee that the outcome of hierarchical clustering operations remains the same before and after right protection. Our approach considers all prevalent hierarchical clustering variants: single-, complete-, and average-linkage. We imprint the ownership in a dataset using watermarking principles, and we derive tight bounds on the expansion/contraction of distances incurred by the process. We leverage our analysis to design fast algorithms for right protection without exhaustively searching the vast design space. Finally, because the right-protection process introduces a user-tunable distortion on the dataset, we explore the possibility of using this mechanism for data obfuscation. We quantify the tradeoff between obfuscation and utility for spatiotemporal datasets and discover very favorable characteristics of the process. An additional advantage is that when one is interested in both right-protecting and obfuscating the original data values, the proposed mechanism can accomplish both tasks simultaneously.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{VazDeMelo:2015:UDP, author = "Pedro O. S. {Vaz De Melo} and Christos Faloutsos and Renato Assun{\c{c}}{\~a}o and Rodrigo Alves and Antonio A. F. Loureiro", title = "Universal and Distinct Properties of Communication Dynamics: How to Generate Realistic Inter-event Times", journal = j-TKDD, volume = "9", number = "3", pages = "24:1--24:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700399", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the advancement of information systems, means of communications are becoming cheaper, faster, and more available. Today, millions of people carrying smartphones or tablets are able to communicate practically any time and anywhere they want. They can access their e-mails, comment on weblogs, watch and post videos and photos (as well as comment on them), and make phone calls or text messages almost ubiquitously. Given this scenario, in this article, we tackle a fundamental aspect of this new era of communication: How the time intervals between communication events behave for different technologies and means of communications. Are there universal patterns for the Inter-Event Time Distribution (IED)? How do inter-event times behave differently among particular technologies? To answer these questions, we analyzed eight different datasets from real and modern communication data and found four well-defined patterns seen in all the eight datasets. Moreover, we propose the use of the Self-Feeding Process (SFP) to generate inter-event times between communications. The SFP is an extremely parsimonious point process that requires at most two parameters and is able to generate inter-event times with all the universal properties we observed in the data. We also show three potential applications of the SFP: as a framework to generate a synthetic dataset containing realistic communication events of any one of the analyzed means of communications, as a technique to detect anomalies, and as a building block for more specific models that aim to encompass the particularities seen in each of the analyzed systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2015:WIY, author = "Jing Zhang and Jie Tang and Juanzi Li and Yang Liu and Chunxiao Xing", title = "Who Influenced You? {Predicting} Retweet via Social Influence Locality", journal = j-TKDD, volume = "9", number = "3", pages = "25:1--25:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700398", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 14 09:22:28 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Social influence occurs when one's opinions, emotions, or behaviors are affected by others in a social network. However, social influence takes many forms, and its underlying mechanism is still unclear. For example, how is one's behavior influenced by a group of friends who know each other and by the friends from different ego friend circles? In this article, we study the social influence problem in a large microblogging network. Particularly, we consider users' (re)tweet behaviors and focus on investigating how friends in one's ego network influence retweet behaviors. We propose a novel notion of social influence locality and develop two instantiation functions based on pairwise influence and structural diversity. The defined influence locality functions have strong predictive power. Without any additional features, we can obtain an F1-score of 71.65\% for predicting users' retweet behaviors by training a logistic regression classifier based on the defined influence locality functions. We incorporate social influence locality into a factor graph model, which can further leverage the network-based correlation. Our experiments on the large microblogging network show that the model significantly improves the precision of retweet prediction. Our analysis also reveals several intriguing discoveries. For example, if you have six friends retweeting a microblog, the average likelihood that you will also retweet it strongly depends on the structure among the six friends: The likelihood will significantly drop (only 1/6) when the six friends do not know each other, compared with the case when the six friends know each other.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xie:2015:MMA, author = "Hong Xie and John C. S. Lui", title = "Mathematical Modeling and Analysis of Product Rating with Partial Information", journal = j-TKDD, volume = "9", number = "4", pages = "26:1--26:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700386", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Many Web services like Amazon, Epinions, and TripAdvisor provide historical product ratings so that users can evaluate the quality of products. Product ratings are important because they affect how well a product will be adopted by the market. The challenge is that we only have partial information on these ratings: each user assigns ratings to only a small subset of products. Under this partial information setting, we explore a number of fundamental questions. What is the minimum number of ratings a product needs so that one can make a reliable evaluation of its quality? How may users' misbehavior, such as cheating in product rating, affect the evaluation result? To answer these questions, we present a probabilistic model to capture various important factors (e.g., rating aggregation rules, rating behavior) that may influence the product quality assessment under the partial information setting. We derive the minimum number of ratings needed to produce a reliable indicator on the quality of a product. We extend our model to accommodate users' misbehavior in product rating. We derive the maximum fraction of misbehaving users that a rating aggregation rule can tolerate and the minimum number of ratings needed to compensate. We carry out experiments using both synthetic and real-world data (from Amazon and TripAdvisor). We not only validate our model but also show that the ``average rating rule'' produces more reliable and robust product quality assessments than the ``majority rating rule'' and the ``median rating rule'' in aggregating product ratings. Last, we perform experiments on two movie rating datasets (from Flixster and Netflix) to demonstrate how to apply our framework to improve the applications of recommender systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Esuli:2015:OTQ, author = "Andrea Esuli and Fabrizio Sebastiani", title = "Optimizing Text Quantifiers for Multivariate Loss Functions", journal = j-TKDD, volume = "9", number = "4", pages = "27:1--27:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700406", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We address the problem of quantification, a supervised learning task whose goal is, given a class, to estimate the relative frequency (or prevalence) of the class in a dataset of unlabeled items. Quantification has several applications in data and text mining, such as estimating the prevalence of positive reviews in a set of reviews of a given product or estimating the prevalence of a given support issue in a dataset of transcripts of phone calls to tech support. So far, quantification has been addressed by learning a general-purpose classifier, counting the unlabeled items that have been assigned the class, and tuning the obtained counts according to some heuristics. In this article, we depart from the tradition of using general-purpose classifiers and use instead a supervised learning model for structured prediction, capable of generating classifiers directly optimized for the (multivariate and nonlinear) function used for evaluating quantification accuracy. The experiments that we have run on 5,500 binary high-dimensional datasets (averaging more than 14,000 documents each) show that this method is more accurate, more stable, and more efficient than existing state-of-the-art quantification methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2015:IMS, author = "Bing-Rong Lin and Daniel Kifer", title = "Information Measures in Statistical Privacy and Data Processing Applications", journal = j-TKDD, volume = "9", number = "4", pages = "28:1--28:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700407", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In statistical privacy, utility refers to two concepts: information preservation, how much statistical information is retained by a sanitizing algorithm, and usability, how (and with how much difficulty) one extracts this information to build statistical models, answer queries, and so forth. Some scenarios incentivize a separation between information preservation and usability, so that the data owner first chooses a sanitizing algorithm to maximize a measure of information preservation, and, afterward, the data consumers process the sanitized output according to their various individual needs [Ghosh et al. 2009; Williams and McSherry 2010]. We analyze the information-preserving properties of utility measures with a combination of two new and three existing utility axioms and study how violations of an axiom can be fixed. We show that the average (over possible outputs of the sanitizer) error of Bayesian decision makers forms the unique class of utility measures that satisfy all of the axioms. The axioms are agnostic to Bayesian concepts such as subjective probabilities and hence strengthen support for Bayesian views in privacy research. In particular, this result connects information preservation to aspects of usability-if the information preservation of a sanitizing algorithm should be measured as the average error of a Bayesian decision maker, shouldn't Bayesian decision theory be a good choice when it comes to using the sanitized outputs for various purposes? We put this idea to the test in the unattributed histogram problem where our decision-theoretic postprocessing algorithm empirically outperforms previously proposed approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2015:DAC, author = "Hao Huang and Shinjae Yoo and Dantong Yu and Hong Qin", title = "Density-Aware Clustering Based on Aggregated Heat Kernel and Its Transformation", journal = j-TKDD, volume = "9", number = "4", pages = "29:1--29:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700385", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Current spectral clustering algorithms suffer from the sensitivity to existing noise and parameter scaling and may not be aware of different density distributions across clusters. If these problems are left untreated, the consequent clustering results cannot accurately represent true data patterns, in particular, for complex real-world datasets with heterogeneous densities. This article aims to solve these problems by proposing a diffusion-based Aggregated Heat Kernel (AHK) to improve the clustering stability, and a Local Density Affinity Transformation (LDAT) to correct the bias originating from different cluster densities. AHK statistically models the heat diffusion traces along the entire time scale, so it ensures robustness during the clustering process, while LDAT probabilistically reveals the local density of each instance and suppresses the local density bias in the affinity matrix. Our proposed framework integrates these two techniques systematically. As a result, it not only provides an advanced noise-resisting and density-aware spectral mapping to the original dataset but also demonstrates the stability during the processing of tuning the scaling parameter (which usually controls the range of neighborhood). Furthermore, our framework works well with the majority of similarity kernels, which ensures its applicability to many types of data and problem domains. The systematic experiments on different applications show that our proposed algorithm outperforms state-of-the-art clustering algorithms for the data with heterogeneous density distributions and achieves robust clustering performance with respect to tuning the scaling parameter and handling various levels and types of noise.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2015:CSF, author = "Kui Yu and Wei Ding and Dan A. Simovici and Hao Wang and Jian Pei and Xindong Wu", title = "Classification with Streaming Features: an Emerging-Pattern Mining Approach", journal = j-TKDD, volume = "9", number = "4", pages = "30:1--30:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700409", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Many datasets from real-world applications have very high-dimensional or increasing feature space. It is a new research problem to learn and maintain a classifier to deal with very high dimensionality or streaming features. In this article, we adapt the well-known emerging-pattern--based classification models and propose a semi-streaming approach. For streaming features, it is computationally expensive or even prohibitive to mine long-emerging patterns, and it is nontrivial to integrate emerging-pattern mining with feature selection. We present an online feature selection step, which is capable of selecting and maintaining a pool of effective features from a feature stream. Then, in our offline step, separated from the online step, we periodically compute and update emerging patterns from the pool of selected features from the online step. We evaluate the effectiveness and efficiency of the proposed method using a series of benchmark datasets and a real-world case study on Mars crater detection. Our proposed method yields classification performance comparable to the state-of-art static classification methods. Most important, the proposed method is significantly faster and can efficiently handle datasets with streaming features.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2015:SEH, author = "Guimei Liu and Haojun Zhang and Mengling Feng and Limsoon Wong and See-Kiong Ng", title = "Supporting Exploratory Hypothesis Testing and Analysis", journal = j-TKDD, volume = "9", number = "4", pages = "31:1--31:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2701430", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Conventional hypothesis testing is carried out in a hypothesis-driven manner. A scientist must first formulate a hypothesis based on what he or she sees and then devise a variety of experiments to test it. Given the rapid growth of data, it has become virtually impossible for a person to manually inspect all data to find all of the interesting hypotheses for testing. In this article, we propose and develop a data-driven framework for automatic hypothesis testing and analysis. We define a hypothesis as a comparison between two or more subpopulations. We find subpopulations for comparison using frequent pattern mining techniques and then pair them up for statistical hypothesis testing. We also generate additional information for further analysis of the hypotheses that are deemed significant. The number of hypotheses generated can be very large, and many of them are very similar. We develop algorithms to remove redundant hypotheses and present a succinct set of significant hypotheses to users. We conducted a set of experiments to show the efficiency and effectiveness of the proposed algorithms. The results show that our system can help users (1) identify significant hypotheses efficiently, (2) isolate the reasons behind significant hypotheses efficiently, and (3) find confounding factors that form Simpson's paradoxes with discovered significant hypotheses.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Greco:2015:PDU, author = "Gianluigi Greco and Antonella Guzzo and Francesco Lupia and Luigi Pontieri", title = "Process Discovery under Precedence Constraints", journal = j-TKDD, volume = "9", number = "4", pages = "32:1--32:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2710020", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Process discovery has emerged as a powerful approach to support the analysis and the design of complex processes. It consists of analyzing a set of traces registering the sequence of tasks performed along several enactments of a transactional system, in order to build a process model that can explain all the episodes recorded over them. An approach to accomplish this task is presented that can benefit from the background knowledge that, in many cases, is available to the analysts taking care of the process (re-)design. The approach is based on encoding the information gathered from the log and the (possibly) given background knowledge in terms of precedence constraints, that is, of constraints over the topology of the resulting process models. Mining algorithms are eventually formulated in terms of reasoning problems over precedence constraints, and the computational complexity of such problems is thoroughly analyzed by tracing their tractability frontier. Solution algorithms are proposed and their properties analyzed. These algorithms have been implemented in a prototype system, and results of a thorough experimental activity are discussed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mirbakhsh:2015:ITR, author = "Nima Mirbakhsh and Charles X. Ling", title = "Improving Top-{$N$} Recommendation for Cold-Start Users via Cross-Domain Information", journal = j-TKDD, volume = "9", number = "4", pages = "33:1--33:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2724720", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Making accurate recommendations for cold-start users is a challenging yet important problem in recommendation systems. Including more information from other domains is a natural solution to improve the recommendations. However, most previous work in cross-domain recommendations has focused on improving prediction accuracy with several severe limitations. In this article, we extend our previous work on clustering-based matrix factorization in single domains into cross domains. In addition, we utilize recent results on unobserved ratings. Our new method can more effectively utilize data from auxiliary domains to achieve better recommendations, especially for cold-start users. For example, our method improves the recall to 21\% on average for cold-start users, whereas previous methods result in only 15\% recall in the cross-domain Amazon dataset. We also observe almost the same improvements in the Epinions dataset. Considering that it is often difficult to make even a small improvement in recommendations, for cold-start users in particular, our result is quite significant.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bonchi:2015:CCC, author = "Francesco Bonchi and Aristides Gionis and Francesco Gullo and Charalampos E. Tsourakakis and Antti Ukkonen", title = "Chromatic Correlation Clustering", journal = j-TKDD, volume = "9", number = "4", pages = "34:1--34:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2728170", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jun 3 06:21:22 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We study a novel clustering problem in which the pairwise relations between objects are categorical. This problem can be viewed as clustering the vertices of a graph whose edges are of different types (colors). We introduce an objective function that ensures the edges within each cluster have, as much as possible, the same color. We show that the problem is NP-hard and propose a randomized algorithm with approximation guarantee proportional to the maximum degree of the input graph. The algorithm iteratively picks a random edge as a pivot, builds a cluster around it, and removes the cluster from the graph. Although being fast, easy to implement, and parameter-free, this algorithm tends to produce a relatively large number of clusters. To overcome this issue we introduce a variant algorithm, which modifies how the pivot is chosen and how the cluster is built around the pivot. Finally, to address the case where a fixed number of output clusters is required, we devise a third algorithm that directly optimizes the objective function based on the alternating-minimization paradigm. We also extend our objective function to handle cases where object's relations are described by multiple labels. We modify our randomized approximation algorithm to optimize such an extended objective function and show that its approximation guarantee remains proportional to the maximum degree of the graph. We test our algorithms on synthetic and real data from the domains of social media, protein-interaction networks, and bibliometrics. Results reveal that our algorithms outperform a baseline algorithm both in the task of reconstructing a ground-truth clustering and in terms of objective-function value.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2015:LSC, author = "Hua Wang and Feiping Nie and Heng Huang", title = "Large-Scale Cross-Language {Web} Page Classification via Dual Knowledge Transfer Using Fast Nonnegative Matrix Trifactorization", journal = j-TKDD, volume = "10", number = "1", pages = "1:1--1:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2710021", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the rapid growth of modern technologies, Internet has reached almost every corner of the world. As a result, it becomes more and more important to manage and mine information contained in Web pages in different languages. Traditional supervised learning methods usually require a large amount of training data to obtain accurate and robust classification models. However, labeled Web pages did not increase as fast as the growth of Internet. The lack of sufficient training Web pages in many languages, especially for those in uncommonly used languages, makes it a challenge for traditional classification algorithms to achieve satisfactory performance. To address this, we observe that Web pages for a same topic from different languages usually share some common semantic patterns, though in different representation forms. In addition, we also observe that the associations between word clusters and Web page classes are another type of reliable carriers to transfer knowledge across languages. With these recognitions, in this article we propose a novel joint nonnegative matrix trifactorization (NMTF) based Dual Knowledge Transfer (DKT) approach for cross-language Web page classification. Our approach transfers knowledge from the auxiliary language, in which abundant labeled Web pages are available, to the target languages, in which we want to classify Web pages, through two different paths: word cluster approximation and the associations between word clusters and Web page classes. With the reinforcement between these two different knowledge transfer paths, our approach can achieve better classification accuracy. In order to deal with the large-scale real world data, we further develop the proposed DKT approach by constraining the factor matrices of NMTF to be cluster indicator matrices. Due to the nature of cluster indicator matrices, we can decouple the proposed optimization objective and the resulted subproblems are of much smaller sizes involving much less matrix multiplications, which make our new approach much more computationally efficient. We evaluate the proposed approach in extensive experiments using a real world cross-language Web page data set. Promising results have demonstrated the effectiveness of our approach that are consistent with our theoretical analyses.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2015:SIB, author = "Yang Zhou and Ling Liu", title = "Social Influence Based Clustering and Optimization over Heterogeneous Information Networks", journal = j-TKDD, volume = "10", number = "1", pages = "2:1--2:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2717314", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Social influence analysis has shown great potential for strategic marketing decision. It is well known that people influence one another based on both their social connections and the social activities that they have engaged in the past. In this article, we develop an innovative and high-performance social influence based graph clustering framework with four unique features. First, we explicitly distinguish social connection based influence (self-influence) and social activity based influence (co-influence). We compute the self-influence similarity between two members based on their social connections within a single collaboration network, and compute the co-influence similarity by taking into account not only the set of activities that people participate but also the semantic association between these activities. Second, we define the concept of influence-based similarity by introducing a unified influence-based similarity matrix that employs an iterative weight update method to integrate self-influence and co-influence similarities. Third, we design a dynamic learning algorithm, called SI-C luster, for social influence based graph clustering. It iteratively partitions a large social collaboration network into K clusters based on both the social network itself and the multiple associated activity information networks, each representing a category of activities that people have engaged. To make the SI-Cluster algorithm converge fast, we transform sophisticated nonlinear fractional programming problem with respect to multiple weights into a straightforward nonlinear parametric programming problem of single variable. Finally, we develop an optimization technique of diagonalizable-matrix approximation to speed up the computation of self-influence similarity and co-influence similarities. Our SI-Cluster-Opt significantly improves the efficiency of SI-Cluster on large graphs while maintaining high quality of clustering results. Extensive experimental evaluation on three real-world graphs shows that, compared to existing representative graph clustering algorithms, our SI-Cluster-Opt approach not only achieves a very good balance between self-influence and co-influence similarities but also scales extremely well for clustering large graphs in terms of time complexity while meeting the guarantee of high density, low entropy and low Davies--Bouldin Index.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Papalexakis:2015:PSP, author = "Evangelos E. Papalexakis and Christos Faloutsos and Nicholas D. Sidiropoulos", title = "{ParCube}: Sparse Parallelizable {CANDECOMP--PARAFAC} Tensor Decomposition", journal = j-TKDD, volume = "10", number = "1", pages = "3:1--3:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2729980", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How can we efficiently decompose a tensor into sparse factors, when the data do not fit in memory? Tensor decompositions have gained a steadily increasing popularity in data-mining applications; however, the current state-of-art decomposition algorithms operate on main memory and do not scale to truly large datasets. In this work, we propose ParCube, a new and highly parallelizable method for speeding up tensor decompositions that is well suited to produce sparse approximations. Experiments with even moderately large data indicate over 90\% sparser outputs and 14 times faster execution, with approximation error close to the current state of the art irrespective of computation and memory requirements. We provide theoretical guarantees for the algorithm's correctness and we experimentally validate our claims through extensive experiments, including four different real world datasets (Enron, Lbnl, Facebook and Nell), demonstrating its effectiveness for data-mining practitioners. In particular, we are the first to analyze the very large Nell dataset using a sparse tensor decomposition, demonstrating that ParCube enables us to handle effectively and efficiently very large datasets. Finally, we make our highly scalable parallel implementation publicly available, enabling reproducibility of our work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ahmed:2015:AMC, author = "Rezwan Ahmed and George Karypis", title = "Algorithms for Mining the Coevolving Relational Motifs in Dynamic Networks", journal = j-TKDD, volume = "10", number = "1", pages = "4:1--4:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2733380", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Computational methods and tools that can efficiently and effectively analyze the temporal changes in dynamic complex relational networks enable us to gain significant insights regarding the entity relations and their evolution. This article introduces a new class of dynamic graph patterns, referred to as coevolving relational motifs (CRMs), which are designed to identify recurring sets of entities whose relations change in a consistent way over time. CRMs can provide evidence to the existence of, possibly unknown, coordination mechanisms by identifying the relational motifs that evolve in a similar and highly conserved fashion. We developed an algorithm to efficiently analyze the frequent relational changes between the entities of the dynamic networks and capture all frequent coevolutions as CRMs. Our algorithm follows a depth-first exploration of the frequent CRM lattice and incorporates canonical labeling for redundancy elimination. Experimental results based on multiple real world dynamic networks show that the method is able to efficiently identify CRMs. In addition, a qualitative analysis of the results shows that the discovered patterns can be used as features to characterize the dynamic network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Campello:2015:HDE, author = "Ricardo J. G. B. Campello and Davoud Moulavi and Arthur Zimek and J{\"o}rg Sander", title = "Hierarchical Density Estimates for Data Clustering, Visualization, and Outlier Detection", journal = j-TKDD, volume = "10", number = "1", pages = "5:1--5:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2733381", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "An integrated framework for density-based cluster analysis, outlier detection, and data visualization is introduced in this article. The main module consists of an algorithm to compute hierarchical estimates of the level sets of a density, following Hartigan's classic model of density-contour clusters and trees. Such an algorithm generalizes and improves existing density-based clustering techniques with respect to different aspects. It provides as a result a complete clustering hierarchy composed of all possible density-based clusters following the nonparametric model adopted, for an infinite range of density thresholds. The resulting hierarchy can be easily processed so as to provide multiple ways for data visualization and exploration. It can also be further postprocessed so that: (i) a normalized score of ``outlierness'' can be assigned to each data object, which unifies both the global and local perspectives of outliers into a single definition; and (ii) a ``flat'' (i.e., nonhierarchical) clustering solution composed of clusters extracted from local cuts through the cluster tree (possibly corresponding to different density thresholds) can be obtained, either in an unsupervised or in a semisupervised way. In the unsupervised scenario, the algorithm corresponding to this postprocessing module provides a global, optimal solution to the formal problem of maximizing the overall stability of the extracted clusters. If partially labeled objects or instance-level constraints are provided by the user, the algorithm can solve the problem by considering both constraints violations/satisfactions and cluster stability criteria. An asymptotic complexity analysis, both in terms of running time and memory space, is described. Experiments are reported that involve a variety of synthetic and real datasets, including comparisons with state-of-the-art, density-based clustering and (global and local) outlier detection methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Berardi:2015:UTR, author = "Giacomo Berardi and Andrea Esuli and Fabrizio Sebastiani", title = "Utility-Theoretic Ranking for Semiautomated Text Classification", journal = j-TKDD, volume = "10", number = "1", pages = "6:1--6:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2742548", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Semiautomated Text Classification (SATC) may be defined as the task of ranking a set D of automatically labelled textual documents in such a way that, if a human annotator validates (i.e., inspects and corrects where appropriate) the documents in a top-ranked portion of D with the goal of increasing the overall labelling accuracy of D, the expected increase is maximized. An obvious SATC strategy is to rank D so that the documents that the classifier has labelled with the lowest confidence are top ranked. In this work, we show that this strategy is suboptimal. We develop new utility-theoretic ranking methods based on the notion of validation gain, defined as the improvement in classification effectiveness that would derive by validating a given automatically labelled document. We also propose a new effectiveness measure for SATC-oriented ranking methods, based on the expected reduction in classification error brought about by partially validating a list generated by a given ranking method. We report the results of experiments showing that, with respect to the baseline method mentioned earlier, and according to the proposed measure, our utility-theoretic ranking methods can achieve substantially higher expected reductions in classification error.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2015:DIP, author = "Zhiwen Yu and Zhu Wang and Huilei He and Jilei Tian and Xinjiang Lu and Bin Guo", title = "Discovering Information Propagation Patterns in Microblogging Services", journal = j-TKDD, volume = "10", number = "1", pages = "7:1--7:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2742801", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "During the last decade, microblog has become an important social networking service with billions of users all over the world, acting as a novel and efficient platform for the creation and dissemination of real-time information. Modeling and revealing the information propagation patterns in microblogging services cannot only lead to more accurate understanding of user behaviors and provide insights into the underlying sociology, but also enable useful applications such as trending prediction, recommendation and filtering, spam detection and viral marketing. In this article, we aim to reveal the information propagation patterns in Sina Weibo, the biggest microblogging service in China. First, the cascade of each message is represented as a tree based on its retweeting process. Afterwards, we divide the information propagation pattern into two levels, that is, the macro level and the micro level. On one hand, the macro propagation patterns refer to general propagation modes that are extracted by grouping propagation trees based on hierarchical clustering. On the other hand, the micro propagation patterns are frequent information flow patterns that are discovered using tree-based mining techniques. Experimental results show that several interesting patterns are extracted, such as popular message propagation, artificial propagation, and typical information flows between different types of users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2015:SMB, author = "Xianchao Zhang and Xiaotong Zhang and Han Liu", title = "Smart Multitask {Bregman} Clustering and Multitask Kernel Clustering", journal = j-TKDD, volume = "10", number = "1", pages = "8:1--8:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2747879", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Traditional clustering algorithms deal with a single clustering task on a single dataset. However, there are many related tasks in the real world, which motivates multitask clustering. Recently some multitask clustering algorithms have been proposed, and among them multitask Bregman clustering (MBC) is a very applicable method. MBC alternatively updates clusters and learns relationships between clusters of different tasks, and the two phases boost each other. However, the boosting does not always have positive effects on improving the clustering performance, it may also cause negative effects. Another issue of MBC is that it cannot deal with nonlinear separable data. In this article, we show that in MBC, the process of using cluster relationship to boost the cluster updating phase may cause negative effects, that is, cluster centroids may be skewed under some conditions. We propose a smart multitask Bregman clustering (S-MBC) algorithm which can identify the negative effects of the boosting and avoid the negative effects if they occur. We then propose a multitask kernel clustering (MKC) framework for nonlinear separable data by using a similar framework like MBC in the kernel space. We also propose a specific optimization method, which is quite different from that of MBC, to implement the MKC framework. Since MKC can also cause negative effects like MBC, we further extend the framework of MKC to a smart multitask kernel clustering (S-MKC) framework in a similar way that S-MBC is extended from MBC. We conduct experiments on 10 real world multitask clustering datasets to evaluate the performance of S-MBC and S-MKC. The results on clustering accuracy show that: (1) compared with the original MBC algorithm MBC, S-MBC and S-MKC perform much better; (2) compared with the convex discriminative multitask relationship clustering (DMTRC) algorithms DMTRC-L and DMTRC-R which also avoid negative transfer, S-MBC and S-MKC perform worse in the (ideal) case in which different tasks have the same cluster number and the empirical label marginal distribution in each task distributes evenly, but better or comparable in other (more general) cases. Moreover, S-MBC and S-MKC can work on the datasets in which different tasks have different number of clusters, violating the assumptions of DMTRC-L and DMTRC-R. The results on efficiency show that S-MBC and S-MKC consume more computational time than MBC and less computational time than DMTRC-L and DMTRC-R. Overall S-MBC and S-MKC are competitive compared with the state-of-the-art multitask clustering algorithms in synthetical terms of accuracy, efficiency and applicability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wei:2015:MTP, author = "Wei Wei and Kathleen M. Carley", title = "Measuring Temporal Patterns in Dynamic Social Networks", journal = j-TKDD, volume = "10", number = "1", pages = "9:1--9:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2749465", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given social networks over time, how can we measure network activities across different timesteps with a limited number of metrics? We propose two classes of dynamic metrics for assessing temporal evolution patterns of agents in terms of persistency and emergence. For each class of dynamic metrics, we implement it using three different temporal aggregation models ranging from the most commonly used Average Aggregation Model to more the complex models such as the Exponential Aggregation Model. We argue that the problem of measuring temporal patterns can be formulated using Recency and Primacy effect, which is a concept used to characterize human cognitive processes. Experimental results show that the way metrics model Recency--Primacy effect is closely related to their abilities to measure temporal patterns. Furthermore, our results indicate that future network agent activities can be predicted based on history information using dynamic metrics. By conducting multiple experiments, we are also able to find an optimal length of history information that is most relevant to future activities. This optimal length is highly consistent within a dataset and can be used as an intrinsic metric to evaluate a dynamic social network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2015:RAT, author = "Siyuan Liu and Qiang Qu and Shuhui Wang", title = "Rationality Analytics from Trajectories", journal = j-TKDD, volume = "10", number = "1", pages = "10:1--10:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2735634", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jul 28 17:19:31 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The availability of trajectories tracking the geographical locations of people as a function of time offers an opportunity to study human behaviors. In this article, we study rationality from the perspective of user decision on visiting a point of interest (POI) which is represented as a trajectory. However, the analysis of rationality is challenged by a number of issues, for example, how to model a trajectory in terms of complex user decision processes? and how to detect hidden factors that have significant impact on the rational decision making? In this study, we propose Rationality Analysis Model (RAM) to analyze rationality from trajectories in terms of a set of impact factors. In order to automatically identify hidden factors, we propose a method, Collective Hidden Factor Retrieval (CHFR), which can also be generalized to parse multiple trajectories at the same time or parse individual trajectories of different time periods. Extensive experimental study is conducted on three large-scale real-life datasets (i.e., taxi trajectories, user shopping trajectories, and visiting trajectories in a theme park). The results show that the proposed methods are efficient, effective, and scalable. We also deploy a system in a large theme park to conduct a field study. Interesting findings and user feedback of the field study are provided to support other applications in user behavior mining and analysis, such as business intelligence and user management for marketing purposes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jia:2015:SGR, author = "Adele Lu Jia and Siqi Shen and Ruud {Van De Bovenkamp} and Alexandru Iosup and Fernando Kuipers and Dick H. J. Epema", title = "Socializing by Gaming: Revealing Social Relationships in Multiplayer Online Games", journal = j-TKDD, volume = "10", number = "2", pages = "11:1--11:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2736698", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multiplayer Online Games (MOGs) like Defense of the Ancients and StarCraft II have attracted hundreds of millions of users who communicate, interact, and socialize with each other through gaming. In MOGs, rich social relationships emerge and can be used to improve gaming services such as match recommendation and game population retention, which are important for the user experience and the commercial value of the companies who run these MOGs. In this work, we focus on understanding social relationships in MOGs. We propose a graph model that is able to capture social relationships of a variety of types and strengths. We apply our model to real-world data collected from three MOGs that contain in total over ten years of behavioral history for millions of players and matches. We compare social relationships in MOGs across different game genres and with regular online social networks like Facebook. Taking match recommendation as an example application of our model, we propose SAMRA, a Socially Aware Match Recommendation Algorithm that takes social relationships into account. We show that our model not only improves the precision of traditional link prediction approaches, but also potentially helps players enjoy games to a higher extent.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Papagelis:2015:RSG, author = "Manos Papagelis", title = "Refining Social Graph Connectivity via Shortcut Edge Addition", journal = j-TKDD, volume = "10", number = "2", pages = "12:1--12:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2757281", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Small changes on the structure of a graph can have a dramatic effect on its connectivity. While in the traditional graph theory, the focus is on well-defined properties of graph connectivity, such as biconnectivity, in the context of a social graph, connectivity is typically manifested by its ability to carry on social processes. In this paper, we consider the problem of adding a small set of nonexisting edges (shortcuts) in a social graph with the main objective of minimizing its characteristic path length. This property determines the average distance between pairs of vertices and essentially controls how broadly information can propagate through a network. We formally define the problem of interest, characterize its hardness and propose a novel method, path screening, which quickly identifies important shortcuts to guide the augmentation of the graph. We devise a sampling-based variant of our method that can scale up the computation in larger graphs. The claims of our methods are formally validated. Through experiments on real and synthetic data, we demonstrate that our methods are a multitude of times faster than standard approaches, their accuracy outperforms sensible baselines and they can ease the spread of information in a network, for a varying range of conditions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hong:2015:CAR, author = "Liang Hong and Lei Zou and Cheng Zeng and Luming Zhang and Jian Wang and Jilei Tian", title = "Context-Aware Recommendation Using Role-Based Trust Network", journal = j-TKDD, volume = "10", number = "2", pages = "13:1--13:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2751562", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recommender systems have been studied comprehensively in both academic and industrial fields over the past decade. As user interests can be affected by context at any time and any place in mobile scenarios, rich context information becomes more and more important for personalized context-aware recommendations. Although existing context-aware recommender systems can make context-aware recommendations to some extent, they suffer several inherent weaknesses: (1) Users' context-aware interests are not modeled realistically, which reduces the recommendation quality; (2) Current context-aware recommender systems ignore trust relations among users. Trust relations are actually context-aware and associated with certain aspects (i.e., categories of items) in mobile scenarios. In this article, we define a term role to model common context-aware interests among a group of users. We propose an efficient role mining algorithm to mine roles from a ``user-context-behavior'' matrix, and a role-based trust model to calculate context-aware trust value between two users. During online recommendation, given a user u in a context c, an efficient weighted set similarity query (WSSQ) algorithm is designed to build u 's role-based trust network in context c. Finally, we make recommendations to u based on u 's role-based trust network by considering both context-aware roles and trust relations. Extensive experiments demonstrate that our recommendation approach outperforms the state-of-the-art methods in both effectiveness and efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2015:OBF, author = "Lei Zhang and Ping Luo and Linpeng Tang and Enhong Chen and Qi Liu and Min Wang and Hui Xiong", title = "Occupancy-Based Frequent Pattern Mining", journal = j-TKDD, volume = "10", number = "2", pages = "14:1--14:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2753765", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Frequent pattern mining is an important data mining problem with many broad applications. Most studies in this field use support (frequency) to measure the popularity of a pattern, namely the fraction of transactions or sequences that include the pattern in a data set. In this study, we introduce a new interesting measure, namely occupancy, to measure the completeness of a pattern in its supporting transactions or sequences. This is motivated by some real-world pattern recommendation applications in which an interesting pattern should not only be frequent, but also occupies a large portion of its supporting transactions or sequences. With the definition of occupancy we call a pattern dominant if its occupancy value is above a user-specified threshold. Then, our task is to identify the qualified patterns which are both dominant and frequent. Also, we formulate the problem of mining top-k qualified patterns, that is, finding k qualified patterns with maximum values on a user-defined function of support and occupancy, for example, weighted sum of support and occupancy. The challenge to these tasks is that the value of occupancy does not change monotonically when more items are appended to a given pattern. Therefore, we propose a general algorithm called DOFRA (DOminant and FRequent pattern mining Algorithm) for mining these qualified patterns, which explores the upper bound properties on occupancy to drastically reduce the search process. Finally, we show the effectiveness of DOFRA in two real-world applications and also demonstrate the efficiency of DOFRA on several real and large synthetic datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2015:AAS, author = "Hung-Hsuan Chen and C. Lee Giles", title = "{ASCOS++}: an Asymmetric Similarity Measure for Weighted Networks to Address the Problem of {SimRank}", journal = j-TKDD, volume = "10", number = "2", pages = "15:1--15:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2776894", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article, we explore the relationships among digital objects in terms of their similarity based on vertex similarity measures. We argue that SimRank --- a famous similarity measure --- and its families, such as P-Rank and SimRank++, fail to capture similar node pairs in certain conditions, especially when two nodes can only reach each other through paths of odd lengths. We present new similarity measures ASCOS and ASCOS++ to address the problem. ASCOS outputs a more complete similarity score than SimRank and SimRank's families. ASCOS++ enriches ASCOS to include edge weight into the measure, giving all edges and network weights an opportunity to make their contribution. We show that both ASCOS++ and ASCOS can be reformulated and applied on a distributed environment for parallel contribution. Experimental results show that ASCOS++ reports a better score than SimRank and several famous similarity measures. Finally, we re-examine previous use cases of SimRank, and explain appropriate and inappropriate use cases. We suggest future SimRank users following the rules proposed here before na{\"\i}vely applying it. We also discuss the relationship between ASCOS++ and PageRank.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zafarani:2015:UIA, author = "Reza Zafarani and Lei Tang and Huan Liu", title = "User Identification Across Social Media", journal = j-TKDD, volume = "10", number = "2", pages = "16:1--16:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2747880", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "People use various social media sites for different purposes. The information on each site is often partial. When sources of complementary information are integrated, a better profile of a user can be built. This profile can help improve online services such as advertising across sites. To integrate these sources of information, it is necessary to identify individuals across social media sites. This paper aims to address the cross-media user identification problem. We provide evidence on the existence of a mapping among identities of individuals across social media sites, study the feasibility of finding this mapping, and illustrate and develop means for finding this mapping. Our studies show that effective approaches that exploit information redundancies due to users' unique behavioral patterns can be utilized to find such a mapping. This study paves the way for analysis and mining across social networking sites, and facilitates the creation of novel online services across sites. In particular, recommending friends and advertising across networks, analyzing information diffusion across sites, and studying specific user behavior such as user migration across sites in social media are one of the many areas that can benefit from the results of this study.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2015:RUC, author = "Lei Li and Wei Peng and Saurabh Kataria and Tong Sun and Tao Li", title = "Recommending Users and Communities in Social Media", journal = j-TKDD, volume = "10", number = "2", pages = "17:1--17:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2757282", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Social media has become increasingly prevalent in the last few years, not only enabling people to connect with each other by social links, but also providing platforms for people to share information and interact over diverse topics. Rich user-generated information, for example, users' relationships and daily posts, are often available in most social media service websites. Given such information, a challenging problem is to provide reasonable user and community recommendation for a target user, and consequently, help the target user engage in the daily discussions and activities with his/her friends or like-minded people. In this article, we propose a unified framework of recommending users and communities that utilizes the information in social media. Given a user's profile or a set of keywords as input, our framework is capable of recommending influential users and topic-cohesive interactive communities that are most relevant to the given user or keywords. With the proposed framework, users can find other individuals or communities sharing similar interests, and then have more interaction with these users or within the communities. We present a generative topic model to discover user-oriented and community-oriented topics simultaneously, which enables us to capture the exact topical interests of users, as well as the focuses of communities. Extensive experimental evaluation and case studies on a dataset collected from Twitter demonstrate the effectiveness of our proposed framework compared with other probabilistic-topic-model-based recommendation methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2015:GGA, author = "Rose Yu and Xinran He and Yan Liu", title = "{GLAD}: Group Anomaly Detection in Social Media Analysis", journal = j-TKDD, volume = "10", number = "2", pages = "18:1--18:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2811268", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Traditional anomaly detection on social media mostly focuses on individual point anomalies while anomalous phenomena usually occur in groups. Therefore, it is valuable to study the collective behavior of individuals and detect group anomalies. Existing group anomaly detection approaches rely on the assumption that the groups are known, which can hardly be true in real world social media applications. In this article, we take a generative approach by proposing a hierarchical Bayes model: Group Latent Anomaly Detection (GLAD) model. GLAD takes both pairwise and point-wise data as input, automatically infers the groups and detects group anomalies simultaneously. To account for the dynamic properties of the social media data, we further generalize GLAD to its dynamic extension d-GLAD. We conduct extensive experiments to evaluate our models on both synthetic and real world datasets. The empirical results demonstrate that our approach is effective and robust in discovering latent groups and detecting group anomalies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chakrabarti:2015:BPL, author = "Aniket Chakrabarti and Venu Satuluri and Atreya Srivathsan and Srinivasan Parthasarathy", title = "A {Bayesian} Perspective on Locality Sensitive Hashing with Extensions for Kernel Methods", journal = j-TKDD, volume = "10", number = "2", pages = "19:1--19:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2778990", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a collection of objects and an associated similarity measure, the all-pairs similarity search problem asks us to find all pairs of objects with similarity greater than a certain user-specified threshold. In order to reduce the number of candidates to search, locality-sensitive hashing (LSH) based indexing methods are very effective. However, most such methods only use LSH for the first phase of similarity search --- that is, efficient indexing for candidate generation. In this article, we present BayesLSH, a principled Bayesian algorithm for the subsequent phase of similarity search --- performing candidate pruning and similarity estimation using LSH. A simpler variant, BayesLSH-Lite, which calculates similarities exactly, is also presented. Our algorithms are able to quickly prune away a large majority of the false positive candidate pairs, leading to significant speedups over baseline approaches. For BayesLSH, we also provide probabilistic guarantees on the quality of the output, both in terms of accuracy and recall. Finally, the quality of BayesLSH's output can be easily tuned and does not require any manual setting of the number of hashes to use for similarity estimation, unlike standard approaches. For two state-of-the-art candidate generation algorithms, AllPairs and LSH, BayesLSH enables significant speedups, typically in the range 2 $ \times $ --20 $ \times $ for a wide variety of datasets. We also extend the BayesLSH algorithm for kernel methods --- in which the similarity between two data objects is defined by a kernel function. Since the embedding of data points in the transformed kernel space is unknown, algorithms such as AllPairs which rely on building inverted index structure for fast similarity search do not work with kernel functions. Exhaustive search across all possible pairs is also not an option since the dataset can be huge and computing the kernel values for each pair can be prohibitive. We propose K-BayesLSH an all-pairs similarity search problem for kernel functions. K-BayesLSH leverages a recently proposed idea --- kernelized locality sensitive hashing (KLSH) --- for hash bit computation and candidate generation, and uses the aforementioned BayesLSH idea for candidate pruning and similarity estimation. We ran a broad spectrum of experiments on a variety of datasets drawn from different domains and with distinct kernels and find a speedup of 2 $ \times $ --7 $ \times $ over vanilla KLSH.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2015:DAV, author = "Yao Zhang and B. Aditya Prakash", title = "Data-Aware Vaccine Allocation Over Large Networks", journal = j-TKDD, volume = "10", number = "2", pages = "20:1--20:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2803176", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Oct 26 17:19:18 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a graph, like a social/computer network or the blogosphere, in which an infection (or meme or virus) has been spreading for some time, how to select the k best nodes for immunization/quarantining immediately? Most previous works for controlling propagation (say via immunization) have concentrated on developing strategies for vaccination preemptively before the start of the epidemic. While very useful to provide insights in to which baseline policies can best control an infection, they may not be ideal to make real-time decisions as the infection is progressing. In this paper, we study how to immunize healthy nodes, in the presence of already infected nodes. Efficient algorithms for such a problem can help public-health experts make more informed choices, tailoring their decisions to the actual distribution of the epidemic on the ground. First we formulate the Data-Aware Vaccination problem, and prove it is NP-hard and also that it is hard to approximate. Secondly, we propose three effective polynomial-time heuristics DAVA, DAVA-prune and DAVA-fast, of varying degrees of efficiency and performance. Finally, we also demonstrate the scalability and effectiveness of our algorithms through extensive experiments on multiple real networks including large epidemiology datasets (containing millions of interactions). Our algorithms show substantial gains of up to ten times more healthy nodes at the end against many other intuitive and nontrivial competitors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rowe:2016:MUD, author = "Matthew Rowe", title = "Mining User Development Signals for Online Community Churner Detection", journal = j-TKDD, volume = "10", number = "3", pages = "21:1--21:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2798730", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Churners are users who stop using a given service after previously signing up. In the domain of telecommunications and video games, churners represent a loss of revenue as a user leaving indicates that they will no longer pay for the service. In the context of online community platforms (e.g., community message boards, social networking sites, question--answering systems, etc.), the churning of a user can represent different kinds of loss: of social capital, of expertise, or of a vibrant individual who is a mediator for interaction and communication. Detecting which users are likely to churn from online communities, therefore, enables community managers to offer incentives to entice those users back; as retention is less expensive than re-signing users up. In this article, we tackle the task of detecting churners on four online community platforms by mining user development signals. These signals explain how users have evolved along different dimensions (i.e., social and lexical) relative to their prior behaviour and the community in which they have interacted. We present a linear model, based upon elastic-net regularisation, that uses extracted features from the signals to detect churners. Our evaluation of this model against several state of the art baselines, including our own prior work, empirically demonstrates the superior performance that this approach achieves for several experimental settings. This article presents a novel approach to churn prediction that takes a different route from existing approaches that are based on measuring static social network properties of users (e.g., centrality, in-degree, etc.).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Prat-Perez:2016:PTT, author = "Arnau Prat-P{\'e}rez and David Dominguez-Sal and Josep-M. Brunat and Josep-Lluis Larriba-Pey", title = "Put Three and Three Together: Triangle-Driven Community Detection", journal = j-TKDD, volume = "10", number = "3", pages = "22:1--22:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2775108", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community detection has arisen as one of the most relevant topics in the field of graph data mining due to its applications in many fields such as biology, social networks, or network traffic analysis. Although the existing metrics used to quantify the quality of a community work well in general, under some circumstances, they fail at correctly capturing such notion. The main reason is that these metrics consider the internal community edges as a set, but ignore how these actually connect the vertices of the community. We propose the Weighted Community Clustering (WCC), which is a new community metric that takes the triangle instead of the edge as the minimal structural motif indicating the presence of a strong relation in a graph. We theoretically analyse WCC in depth and formally prove, by means of a set of properties, that the maximization of WCC guarantees communities with cohesion and structure. In addition, we propose Scalable Community Detection (SCD), a community detection algorithm based on WCC, which is designed to be fast and scalable on SMP machines, showing experimentally that WCC correctly captures the concept of community in social networks using real datasets. Finally, using ground-truth data, we show that SCD provides better quality than the best disjoint community detection algorithms of the state of the art while performing faster.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2016:MDM, author = "Zhen Guo and Zhongfei (Mark) Zhang and Eric P. Xing and Christos Faloutsos", title = "Multimodal Data Mining in a Multimedia Database Based on Structured Max Margin Learning", journal = j-TKDD, volume = "10", number = "3", pages = "23:1--23:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2742549", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Mining knowledge from a multimedia database has received increasing attentions recently since huge repositories are made available by the development of the Internet. In this article, we exploit the relations among different modalities in a multimedia database and present a framework for general multimodal data mining problem where image annotation and image retrieval are considered as the special cases. Specifically, the multimodal data mining problem can be formulated as a structured prediction problem where we learn the mapping from an input to the structured and interdependent output variables. In addition, in order to reduce the demanding computation, we propose a new max margin structure learning approach called Enhanced Max Margin Learning (EMML) framework, which is much more efficient with a much faster convergence rate than the existing max margin learning methods, as verified through empirical evaluations. Furthermore, we apply EMML framework to develop an effective and efficient solution to the multimodal data mining problem that is highly scalable in the sense that the query response time is independent of the database scale. The EMML framework allows an efficient multimodal data mining query in a very large scale multimedia database, and excels many existing multimodal data mining methods in the literature that do not scale up at all. The performance comparison with a state-of-the-art multimodal data mining method is reported for the real-world image databases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Myers:2016:DAK, author = "Risa B. Myers and John C. Frenzel and Joseph R. Ruiz and Christopher M. Jermaine", title = "Do Anesthesiologists Know What They Are Doing? {Mining} a Surgical Time-Series Database to Correlate Expert Assessment with Outcomes", journal = j-TKDD, volume = "10", number = "3", pages = "24:1--24:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2822897", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Anesthesiologists are taught to carefully manage patient vital signs during surgery. Unfortunately, there is little empirical evidence that vital sign management, as currently practiced, is correlated with patient outcomes. We seek to validate or repudiate current clinical practice and determine whether or not clinician evaluation of surgical vital signs correlate with outcomes. Using a database of over 90,000 cases, we attempt to determine whether those cases that anesthesiologists would subjectively decide are ``low quality'' are more likely to result in negative outcomes. The problem reduces to one of multi-dimensional time-series classification. Our approach is to have a set of expert anesthesiologists independently label a small number of training cases, from which we build classifiers and label all 90,000 cases. We then use the labeling to search for correlation with outcomes and compare the prevalence of important 30-day outcomes between providers. To mimic the providers' quality labels, we consider several standard classification methods, such as dynamic time warping in conjunction with a kNN classifier, as well as complexity invariant distance, and a regression based upon the feature extraction methods outlined by Mao et al. 2012 (using features such as time-series mean, standard deviation, skew, etc.). We also propose a new feature selection mechanism that learns a hidden Markov model to segment the time series; the fraction of time that each series spends in each state is used to label the series using a regression-based classifier. In the end, we obtain strong, empirical evidence that current best practice is correlated with reduced negative patient outcomes. We also learn that all of the experts were able to significantly separate cases by outcome, with higher prevalence of negative 30-day outcomes in the cases labeled as ``low quality'' for almost all of the outcomes investigated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Namata:2016:CGI, author = "Galileo Mark Namata and Ben London and Lise Getoor", title = "Collective Graph Identification", journal = j-TKDD, volume = "10", number = "3", pages = "25:1--25:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818378", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data describing networks---such as communication networks, transaction networks, disease transmission networks, collaboration networks, etc.---are becoming increasingly available. While observational data can be useful, it often only hints at the actual underlying process that governs interactions and attributes. For example, an email communication network provides insight into its users and their relationships, but is not the same as the ``real'' underlying social network. In this article, we introduce the problem of graph identification, i.e., discovering the latent graph structure underlying an observed network. We cast the problem as a probabilistic inference task, in which we must infer the nodes, edges, and node labels of a hidden graph, based on evidence. This entails solving several canonical problems in network analysis: entity resolution (determining when two observations correspond to the same entity), link prediction (inferring the existence of links), and node labeling (inferring hidden attributes). While each of these subproblems has been well studied in isolation, here we consider them as a single, collective task. We present a simple, yet novel, approach to address all three subproblems simultaneously. Our approach, which we refer to as C$^3$, consists of a collection of Coupled Collective Classifiers that are applied iteratively to propagate inferred information among the subproblems. We consider variants of C$^3$ using different learning and inference techniques and empirically demonstrate that C$^3$ is superior, both in terms of predictive accuracy and running time, to state-of-the-art probabilistic approaches on four real problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Subbian:2016:MIU, author = "Karthik Subbian and Charu Aggarwal and Jaideep Srivastava", title = "Mining Influencers Using Information Flows in Social Streams", journal = j-TKDD, volume = "10", number = "3", pages = "26:1--26:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2815625", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The problem of discovering information flow trends in social networks has become increasingly relevant due to the increasing amount of content in online social networks, and its relevance as a tool for research into the content trends analysis in the network. An important part of this analysis is to determine the key patterns of flow in the underlying network. Almost all the work in this area has focused on fixed models of the network structure, and edge-based transmission between nodes. In this article, we propose a fully content-centered model of flow analysis in networks, in which the analysis is based on actual content transmissions in the underlying social stream, rather than a static model of transmission on the edges. First, we introduce the problem of influence analysis in the context of information flow in networks. We then propose a novel algorithm InFlowMine to discover the information flow patterns in the network and demonstrate the effectiveness of the discovered information flows using an influence mining application. This application illustrates the flexibility and effectiveness of our information flow model to find topic- or network-specific influencers, or their combinations. We empirically show that our information flow mining approach is effective and efficient than the existing methods on a number of different measures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Angiulli:2016:TGU, author = "Fabrizio Angiulli and Fabio Fassetti", title = "Toward Generalizing the Unification with Statistical Outliers: The Gradient Outlier Factor Measure", journal = j-TKDD, volume = "10", number = "3", pages = "27:1--27:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2829956", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this work, we introduce a novel definition of outlier, namely the Gradient Outlier Factor (or GOF), with the aim to provide a definition that unifies with the statistical one on some standard distributions but has a different behavior in the presence of mixture distributions. Intuitively, the GOF score measures the probability to stay in the neighborhood of a certain object. It is directly proportional to the density and inversely proportional to the variation of the density. We derive formal properties under which the GOF definition unifies the statistical outlier definition and show that the unification holds for some standard distributions, while the GOF is able to capture tails in the presence of different distributions even if their densities sensibly differ. Moreover, we provide a probabilistic interpretation of the GOF score, by means of the notion of density of the data density. Experimental results confirm that there are scenarios in which the novel definition can be profitably employed. To the best of our knowledge, except for distance-based outlier, no other data mining outlier definition has a so clearly established relationship with statistical outliers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Koutra:2016:DPM, author = "Danai Koutra and Neil Shah and Joshua T. Vogelstein and Brian Gallagher and Christos Faloutsos", title = "{DeltaCon}: Principled Massive-Graph Similarity Function with Attribution", journal = j-TKDD, volume = "10", number = "3", pages = "28:1--28:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2824443", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How much has a network changed since yesterday? How different is the wiring of Bob's brain (a left-handed male) and Alice's brain (a right-handed female), and how is it different? Graph similarity with given node correspondence, i.e., the detection of changes in the connectivity of graphs, arises in numerous settings. In this work, we formally state the axioms and desired properties of the graph similarity functions, and evaluate when state-of-the-art methods fail to detect crucial connectivity changes in graphs. We propose D eltaCon, a principled, intuitive, and scalable algorithm that assesses the similarity between two graphs on the same nodes (e.g., employees of a company, customers of a mobile carrier). In conjunction, we propose DeltaCon-Attr, a related approach that enables attribution of change or dissimilarity to responsible nodes and edges. Experiments on various synthetic and real graphs showcase the advantages of our method over existing similarity measures. Finally, we employ DeltaCon and DeltaCon-Attr on real applications: (a) we classify people to groups of high and low creativity based on their brain connectivity graphs, (b) do temporal anomaly detection in the who-emails-whom Enron graph and find the top culprits for the changes in the temporal corporate email graph, and (c) recover pairs of test-retest large brain scans ({\sim}17M edges, up to 90M edges) for 21 subjects.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2016:MPA, author = "Wayne Xin Zhao and Jinpeng Wang and Yulan He and Ji-Rong Wen and Edward Y. Chang and Xiaoming Li", title = "Mining Product Adopter Information from Online Reviews for Improving Product Recommendation", journal = j-TKDD, volume = "10", number = "3", pages = "29:1--29:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2842629", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present in this article an automated framework that extracts product adopter information from online reviews and incorporates the extracted information into feature-based matrix factorization for more effective product recommendation. In specific, we propose a bootstrapping approach for the extraction of product adopters from review text and categorize them into a number of different demographic categories. The aggregated demographic information of many product adopters can be used to characterize both products and users in the form of distributions over different demographic categories. We further propose a graph-based method to iteratively update user- and product-related distributions more reliably in a heterogeneous user--product graph and incorporate them as features into the matrix factorization approach for product recommendation. Our experimental results on a large dataset crawled from J ingDong, the largest B2C e-commerce website in China, show that our proposed framework outperforms a number of competitive baselines for product recommendation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Duarte:2016:AMR, author = "Jo{\~a}o Duarte and Jo{\~a}o Gama and Albert Bifet", title = "Adaptive Model Rules From High-Speed Data Streams", journal = j-TKDD, volume = "10", number = "3", pages = "30:1--30:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2829955", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Decision rules are one of the most expressive and interpretable models for machine learning. In this article, we present Adaptive Model Rules (AMRules), the first stream rule learning algorithm for regression problems. In AMRules, the antecedent of a rule is a conjunction of conditions on the attribute values, and the consequent is a linear combination of the attributes. In order to maintain a regression model compatible with the most recent state of the process generating data, each rule uses a Page-Hinkley test to detect changes in this process and react to changes by pruning the rule set. Online learning might be strongly affected by outliers. AMRules is also equipped with outliers detection mechanisms to avoid model adaption using anomalous examples. In the experimental section, we report the results of AMRules on benchmark regression problems, and compare the performance of our system with other streaming regression algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2016:SCB, author = "Faming Lu and Qingtian Zeng and Hua Duan", title = "Synchronization-Core-Based Discovery of Processes with Decomposable Cyclic Dependencies", journal = j-TKDD, volume = "10", number = "3", pages = "31:1--31:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2845086", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Traditional process discovery techniques mine process models based upon event traces giving little consideration to workflow relevant data recorded in event logs. The neglect of such information usually leads to incorrect discovered models, especially when activities have decomposable cyclic dependencies. To address this problem, the recorded workflow relevant data and decision tree learning technique are utilized to classify cases into case clusters. Each case cluster contains causality and concurrency activity dependencies only. Then, a set of activity ordering relations are derived based on case clusters. And a synchronization-core-based process model is discovered from the ordering relations and composite cases. Finally, the discovered model is transformed to a BPMN model. The proposed approach is validated with a medical treatment process and an open event log. Meanwhile, a prototype system is presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2016:EAW, author = "Yashu Liu and Jie Wang and Jieping Ye", title = "An Efficient Algorithm For Weak Hierarchical Lasso", journal = j-TKDD, volume = "10", number = "3", pages = "32:1--32:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2791295", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 25 05:56:34 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Linear regression is a widely used tool in data mining and machine learning. In many applications, fitting a regression model with only linear effects may not be sufficient for predictive or explanatory purposes. One strategy that has recently received increasing attention in statistics is to include feature interactions to capture the nonlinearity in the regression model. Such model has been applied successfully in many biomedical applications. One major challenge in the use of such model is that the data dimensionality is significantly higher than the original data, resulting in the small sample size large dimension problem. Recently, weak hierarchical Lasso, a sparse interaction regression model, is proposed that produces a sparse and hierarchical structured estimator by exploiting the Lasso penalty and a set of hierarchical constraints. However, the hierarchical constraints make it a non-convex problem and the existing method finds the solution to its convex relaxation, which needs additional conditions to guarantee the hierarchical structure. In this article, we propose to directly solve the non-convex weak hierarchical Lasso by making use of the General Iterative Shrinkage and Thresholding (GIST) optimization framework, which has been shown to be efficient for solving non-convex sparse formulations. The key step in GIST is to compute a sequence of proximal operators. One of our key technical contributions is to show that the proximal operator associated with the non-convex weak hierarchical Lasso admits a closed-form solution. However, a naive approach for solving each subproblem of the proximal operator leads to a quadratic time complexity, which is not desirable for large-size problems. We have conducted extensive experiments on both synthetic and real datasets. Results show that our proposed algorithm is much more efficient and effective than its convex relaxation. To this end, we further develop an efficient algorithm for computing the subproblems with a linearithmic time complexity. In addition, we extend the technique to perform the optimization-based hierarchical testing of pairwise interactions for binary classification problems, which is essentially the proximal operator associated with weak hierarchical Lasso. Simulation studies show that the non-convex hierarchical testing framework outperforms the convex relaxation when a hierarchical structure exists between main effects and interactions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2016:ISI, author = "Wei Wang and Jure Leskovec", title = "Introduction to the Special Issue of Best Papers in {ACM SIGKDD 2014}", journal = j-TKDD, volume = "10", number = "4", pages = "33:1--33:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2936718", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2016:PSP, author = "Silei Xu and John C. S. Lui", title = "Product Selection Problem: Improve Market Share by Learning Consumer Behavior", journal = j-TKDD, volume = "10", number = "4", pages = "34:1--34:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2753764", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "It is often crucial for manufacturers to decide what products to produce so that they can increase their market share in an increasingly fierce market. To decide which products to produce, manufacturers need to analyze the consumers' requirements and how consumers make their purchase decisions so that the new products will be competitive in the market. In this paper, we first present a general distance-based product adoption model to capture consumers' purchase behavior. Using this model, various distance metrics can be used to describe different real life purchase behavior. We then provide a learning algorithm to decide which set of distance metrics one should use when we are given some accessible historical purchase data. Based on the product adoption model, we formalize the k most marketable products (or $k$-MMP) selection problem and formally prove that the problem is NP-hard. To tackle this problem, we propose an efficient greedy-based approximation algorithm with a provable solution guarantee. Using submodularity analysis, we prove that our approximation algorithm can achieve at least 63\% of the optimal solution. We apply our algorithm on both synthetic datasets and real-world datasets (TripAdvisor.com), and show that our algorithm can easily achieve five or more orders of speedup over the exhaustive search and achieve about 96\% of the optimal solution on average. Our experiments also demonstrate the robustness of our distance metric learning method, and illustrate how one can adopt it to improve the accuracy of product selection.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2016:CSB, author = "Meng Jiang and Peng Cui and Alex Beutel and Christos Faloutsos and Shiqiang Yang", title = "Catching Synchronized Behaviors in Large Networks: a Graph Mining Approach", journal = j-TKDD, volume = "10", number = "4", pages = "35:1--35:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2746403", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a directed graph of millions of nodes, how can we automatically spot anomalous, suspicious nodes judging only from their connectivity patterns? Suspicious graph patterns show up in many applications, from Twitter users who buy fake followers, manipulating the social network, to botnet members performing distributed denial of service attacks, disturbing the network traffic graph. We propose a fast and effective method, C atchSync, which exploits two of the tell-tale signs left in graphs by fraudsters: (a) synchronized behavior: suspicious nodes have extremely similar behavior patterns because they are often required to perform some task together (such as follow the same user); and (b) rare behavior: their connectivity patterns are very different from the majority. We introduce novel measures to quantify both concepts (``synchronicity'' and ``normality'') and we propose a parameter-free algorithm that works on the resulting synchronicity-normality plots. Thanks to careful design, CatchSync has the following desirable properties: (a) it is scalable to large datasets, being linear in the graph size; (b) it is parameter free; and (c) it is side-information-oblivious: it can operate using only the topology, without needing labeled data, nor timing information, and the like., while still capable of using side information if available. We applied CatchSync on three large, real datasets, 1-billion-edge Twitter social graph, 3-billion-edge, and 12-billion-edge Tencent Weibo social graphs, and several synthetic ones; CatchSync consistently outperforms existing competitors, both in detection accuracy by 36\% on Twitter and 20\% on Tencent Weibo, as well as in speed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wei:2016:HTH, author = "Ying Wei and Yangqiu Song and Yi Zhen and Bo Liu and Qiang Yang", title = "Heterogeneous Translated Hashing: a Scalable Solution Towards Multi-Modal Similarity Search", journal = j-TKDD, volume = "10", number = "4", pages = "36:1--36:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2744204", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multi-modal similarity search has attracted considerable attention to meet the need of information retrieval across different types of media. To enable efficient multi-modal similarity search in large-scale databases recently, researchers start to study multi-modal hashing. Most of the existing methods are applied to search across multi-views among which explicit correspondence is provided. Given a multi-modal similarity search task, we observe that abundant multi-view data can be found on the Web which can serve as an auxiliary bridge. In this paper, we propose a Heterogeneous Translated Hashing (HTH) method with such auxiliary bridge incorporated not only to improve current multi-view search but also to enable similarity search across heterogeneous media which have no direct correspondence. HTH provides more flexible and discriminative ability by embedding heterogeneous media into different Hamming spaces, compared to almost all existing methods that map heterogeneous data in a common Hamming space. We formulate a joint optimization model to learn hash functions embedding heterogeneous media into different Hamming spaces, and a translator aligning different Hamming spaces. The extensive experiments on two real-world datasets, one publicly available dataset of Flickr, and the other MIRFLICKR-Yahoo Answers dataset, highlight the effectiveness and efficiency of our algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tong:2016:GES, author = "Hanghang Tong and Fei Wang and Munmun De Choudhury and Zoran Obradovic", title = "Guest Editorial: Special Issue on Connected Health at Big Data Era {(BigChat)}: a {TKDD} Special Issue", journal = j-TKDD, volume = "10", number = "4", pages = "37:1--37:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2912122", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiong:2016:KIT, author = "Feiyu Xiong and Moshe Kam and Leonid Hrebien and Beilun Wang and Yanjun Qi", title = "Kernelized Information-Theoretic Metric Learning for Cancer Diagnosis Using High-Dimensional Molecular Profiling Data", journal = j-TKDD, volume = "10", number = "4", pages = "38:1--38:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2789212", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the advancement of genome-wide monitoring technologies, molecular expression data have become widely used for diagnosing cancer through tumor or blood samples. When mining molecular signature data, the process of comparing samples through an adaptive distance function is fundamental but difficult, as such datasets are normally heterogeneous and high dimensional. In this article, we present kernelized information-theoretic metric learning (KITML) algorithms that optimize a distance function to tackle the cancer diagnosis problem and scale to high dimensionality. By learning a nonlinear transformation in the input space implicitly through kernelization, KITML permits efficient optimization, low storage, and improved learning of distance metric. We propose two novel applications of KITML for diagnosing cancer using high-dimensional molecular profiling data: (1) for sample-level cancer diagnosis, the learned metric is used to improve the performance of k -nearest neighbor classification; and (2) for estimating the severity level or stage of a group of samples, we propose a novel set-based ranking approach to extend KITML. For the sample-level cancer classification task, we have evaluated on 14 cancer gene microarray datasets and compared with eight other state-of-the-art approaches. The results show that our approach achieves the best overall performance for the task of molecular-expression-driven cancer sample diagnosis. For the group-level cancer stage estimation, we test the proposed set-KITML approach using three multi-stage cancer microarray datasets, and correctly estimated the stages of sample groups for all three studies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2016:JML, author = "Pei Yang and Hongxia Yang and Haoda Fu and Dawei Zhou and Jieping Ye and Theodoros Lappas and Jingrui He", title = "Jointly Modeling Label and Feature Heterogeneity in Medical Informatics", journal = j-TKDD, volume = "10", number = "4", pages = "39:1--39:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2768831", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multiple types of heterogeneity including label heterogeneity and feature heterogeneity often co-exist in many real-world data mining applications, such as diabetes treatment classification, gene functionality prediction, and brain image analysis. To effectively leverage such heterogeneity, in this article, we propose a novel graph-based model for Learning with both Label and Feature heterogeneity, namely L$^2$F. It models the label correlation by requiring that any two label-specific classifiers behave similarly on the same views if the associated labels are similar, and imposes the view consistency by requiring that view-based classifiers generate similar predictions on the same examples. The objective function for L$^2$F is jointly convex. To solve the optimization problem, we propose an iterative algorithm, which is guaranteed to converge to the global optimum. One appealing feature of L$^2$F is that it is capable of handling data with missing views and labels. Furthermore, we analyze its generalization performance based on Rademacher complexity, which sheds light on the benefits of jointly modeling the label and feature heterogeneity. Experimental results on various biomedical datasets show the effectiveness of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2016:MDN, author = "Yubao Wu and Xiaofeng Zhu and Li Li and Wei Fan and Ruoming Jin and Xiang Zhang", title = "Mining Dual Networks: Models, Algorithms, and Applications", journal = j-TKDD, volume = "10", number = "4", pages = "40:1--40:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2785970", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Finding the densest subgraph in a single graph is a fundamental problem that has been extensively studied. In many emerging applications, there exist dual networks. For example, in genetics, it is important to use protein interactions to interpret genetic interactions. In this application, one network represents physical interactions among nodes, for example, protein--protein interactions, and another network represents conceptual interactions, for example, genetic interactions. Edges in the conceptual network are usually derived based on certain correlation measure or statistical test measuring the strength of the interaction. Two nodes with strong conceptual interaction may not have direct physical interaction. In this article, we propose the novel dual-network model and investigate the problem of finding the densest connected subgraph (DCS), which has the largest density in the conceptual network and is also connected in the physical network. Density in the conceptual network represents the average strength of the measured interacting signals among the set of nodes. Connectivity in the physical network shows how they interact physically. Such pattern cannot be identified using the existing algorithms for a single network. We show that even though finding the densest subgraph in a single network is polynomial time solvable, the DCS problem is NP-hard. We develop a two-step approach to solve the DCS problem. In the first step, we effectively prune the dual networks, while guarantee that the optimal solution is contained in the remaining networks. For the second step, we develop two efficient greedy methods based on different search strategies to find the DCS. Different variations of the DCS problem are also studied. We perform extensive experiments on a variety of real and synthetic dual networks to evaluate the effectiveness and efficiency of the developed methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cui:2016:BOQ, author = "Licong Cui and Shiqiang Tao and Guo-Qiang Zhang", title = "Biomedical Ontology Quality Assurance Using a Big Data Approach", journal = j-TKDD, volume = "10", number = "4", pages = "41:1--41:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2768830", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article presents recent progresses made in using scalable cloud computing environment, Hadoop and MapReduce, to perform ontology quality assurance (OQA), and points to areas of future opportunity. The standard sequential approach used for implementing OQA methods can take weeks if not months for exhaustive analyses for large biomedical ontological systems. With OQA methods newly implemented using massively parallel algorithms in the MapReduce framework, several orders of magnitude in speed-up can be achieved (e.g., from three months to three hours). Such dramatically reduced time makes it feasible not only to perform exhaustive structural analysis of large ontological hierarchies, but also to systematically track structural changes between versions for evolutional analysis. As an exemplar, progress is reported in using MapReduce to perform evolutional analysis and visualization on the Systemized Nomenclature of Medicine-Clinical Terms (SNOMED CT), a prominent clinical terminology system. Future opportunities in three areas are described: one is to extend the scope of MapReduce-based approach to existing OQA methods, especially for automated exhaustive structural analysis. The second is to apply our proposed MapReduce Pipeline for Lattice-based Evaluation (MaPLE) approach, demonstrated as an exemplar method for SNOMED CT, to other biomedical ontologies. The third area is to develop interfaces for reviewing results obtained by OQA methods and for visualizing ontological alignment and evolution, which can also take advantage of cloud computing technology to systematically pre-compute computationally intensive jobs in order to increase performance during user interactions with the visualization interface. Advances in these directions are expected to better support the ontological engineering lifecycle.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rayana:2016:LMB, author = "Shebuti Rayana and Leman Akoglu", title = "Less is More: Building Selective Anomaly Ensembles", journal = j-TKDD, volume = "10", number = "4", pages = "42:1--42:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2890508", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Ensemble learning for anomaly detection has been barely studied, due to difficulty in acquiring ground truth and the lack of inherent objective functions. In contrast, ensemble approaches for classification and clustering have been studied and effectively used for long. Our work taps into this gap and builds a new ensemble approach for anomaly detection, with application to event detection in temporal graphs as well as outlier detection in no-graph settings. It handles and combines multiple heterogeneous detectors to yield improved and robust performance. Importantly, trusting results from all the constituent detectors may deteriorate the overall performance of the ensemble, as some detectors could provide inaccurate results depending on the type of data in hand and the underlying assumptions of a detector. This suggests that combining the detectors selectively is key to building effective anomaly ensembles-hence ``less is more''. In this paper we propose a novel ensemble approach called SELECT for anomaly detection, which automatically and systematically selects the results from constituent detectors to combine in a fully unsupervised fashion. We apply our method to event detection in temporal graphs and outlier detection in multi-dimensional point data (no-graph), where SELECT successfully utilizes five base detectors and seven consensus methods under a unified ensemble framework. We provide extensive quantitative evaluation of our approach for event detection on five real-world datasets (four with ground truth events), including Enron email communications, RealityMining SMS and phone call records, New York Times news corpus, and World Cup 2014 Twitter news feed. We also provide results for outlier detection on seven real-world multi-dimensional point datasets from UCI Machine Learning Repository. Thanks to its selection mechanism, SELECT yields superior performance compared to the individual detectors alone, the full ensemble (naively combining all results), an existing diversity-based ensemble, and an existing weighted ensemble approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2016:CCS, author = "Yada Zhu and Jingrui He", title = "Co-Clustering Structural Temporal Data with Applications to Semiconductor Manufacturing", journal = j-TKDD, volume = "10", number = "4", pages = "43:1--43:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2875427", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recent years have witnessed data explosion in semiconductor manufacturing due to advances in instrumentation and storage techniques. The large amount of data associated with process variables monitored over time form a rich reservoir of information, which can be used for a variety of purposes, such as anomaly detection, quality control, and fault diagnostics. In particular, following the same recipe for a certain Integrated Circuit device, multiple tools and chambers can be deployed for the production of this device, during which multiple time series can be collected, such as temperature, impedance, gas flow, electric bias, etc. These time series naturally fit into a two-dimensional array (matrix), i.e., each element in this array corresponds to a time series for one process variable from one chamber. To leverage the rich structural information in such temporal data, in this article, we propose a novel framework named C-Struts to simultaneously cluster on the two dimensions of this array. In this framework, we interpret the structural information as a set of constraints on the cluster membership, introduce an auxiliary probability distribution accordingly, and design an iterative algorithm to assign each time series to a certain cluster on each dimension. Furthermore, we establish the equivalence between C-Struts and a generic optimization problem, which is able to accommodate various distance functions. Extensive experiments on synthetic, benchmark, as well as manufacturing datasets demonstrate the effectiveness of the proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tahani:2016:IDD, author = "Maryam Tahani and Ali M. A. Hemmatyar and Hamid R. Rabiee and Maryam Ramezani", title = "Inferring Dynamic Diffusion Networks in Online Media", journal = j-TKDD, volume = "10", number = "4", pages = "44:1--44:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2882968", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Online media play an important role in information societies by providing a convenient infrastructure for different processes. Information diffusion that is a fundamental process taking place on social and information networks has been investigated in many studies. Research on information diffusion in these networks faces two main challenges: (1) In most cases, diffusion takes place on an underlying network, which is latent and its structure is unknown. (2) This latent network is not fixed and changes over time. In this article, we investigate the diffusion network extraction (DNE) problem when the underlying network is dynamic and latent. We model the diffusion behavior (existence probability) of each edge as a stochastic process and utilize the Hidden Markov Model (HMM) to discover the most probable diffusion links according to the current observation of the diffusion process, which is the infection time of nodes and the past diffusion behavior of links. We evaluate the performance of our Dynamic Diffusion Network Extraction (DDNE) method, on both synthetic and real datasets. Experimental results show that the performance of the proposed method is independent of the cascade transmission model and outperforms the state of art method in terms of F-measure.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Koh:2016:URP, author = "Yun Sing Koh and Sri Devi Ravana", title = "Unsupervised Rare Pattern Mining: a Survey", journal = j-TKDD, volume = "10", number = "4", pages = "45:1--45:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2898359", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Association rule mining was first introduced to examine patterns among frequent items. The original motivation for seeking these rules arose from need to examine customer purchasing behaviour in supermarket transaction data. It seeks to identify combinations of items or itemsets, whose presence in a transaction affects the likelihood of the presence of another specific item or itemsets. In recent years, there has been an increasing demand for rare association rule mining. Detecting rare patterns in data is a vital task, with numerous high-impact applications including medical, finance, and security. This survey aims to provide a general, comprehensive, and structured overview of the state-of-the-art methods for rare pattern mining. We investigate the problems in finding rare rules using traditional association rule mining. As rare association rule mining has not been well explored, there is still specific groundwork that needs to be established. We will discuss some of the major issues in rare association rule mining and also look at current algorithms. As a contribution, we give a general framework for categorizing algorithms: Apriori and Tree based. We highlight the differences between these methods. Finally, we present several real-world application using rare pattern mining in diverse domains. We conclude our survey with a discussion on open and practical challenges in the field.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2016:CFR, author = "Wei Cheng and Zhishan Guo and Xiang Zhang and Wei Wang", title = "{CGC}: a Flexible and Robust Approach to Integrating Co-Regularized Multi-Domain Graph for Clustering", journal = j-TKDD, volume = "10", number = "4", pages = "46:1--46:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2903147", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multi-view graph clustering aims to enhance clustering performance by integrating heterogeneous information collected in different domains. Each domain provides a different view of the data instances. Leveraging cross-domain information has been demonstrated an effective way to achieve better clustering results. Despite the previous success, existing multi-view graph clustering methods usually assume that different views are available for the same set of instances. Thus, instances in different domains can be treated as having strict one-to-one relationship. In many real-life applications, however, data instances in one domain may correspond to multiple instances in another domain. Moreover, relationships between instances in different domains may be associated with weights based on prior (partial) knowledge. In this article, we propose a flexible and robust framework, Co-regularized Graph Clustering (CGC), based on non-negative matrix factorization (NMF), to tackle these challenges. CGC has several advantages over the existing methods. First, it supports many-to-many cross-domain instance relationship. Second, it incorporates weight on cross-domain relationship. Third, it allows partial cross-domain mapping so that graphs in different domains may have different sizes. Finally, it provides users with the extent to which the cross-domain instance relationship violates the in-domain clustering structure, and thus enables users to re-evaluate the consistency of the relationship. We develop an efficient optimization method that guarantees to find the global optimal solution with a given confidence requirement. The proposed method can automatically identify noisy domains and assign smaller weights to them. This helps to obtain optimal graph partition for the focused domain. Extensive experimental results on UCI benchmark datasets, newsgroup datasets, and biological interaction networks demonstrate the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shen:2016:SPO, author = "Chih-Ya Shen and De-Nian Yang and Wang-Chien Lee and Ming-Syan Chen", title = "Spatial-Proximity Optimization for Rapid Task Group Deployment", journal = j-TKDD, volume = "10", number = "4", pages = "47:1--47:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818714", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Spatial proximity is one of the most important factors for the quick deployment of the task groups in various time-sensitive missions. This article proposes a new spatial query, Spatio-Social Team Query (SSTQ), that forms a strong task group by considering (1) the group's spatial distance (i.e., transportation time), (2) skills of the candidate group members, and (3) social rapport among the candidates. Efficient processing of SSTQ is very challenging, because the aforementioned spatial, skill, and social factors need to be carefully examined. In this article, therefore, we first formulate two subproblems of SSTQ, namely Hop-Constrained Team Problem (HCTP) and Connection-Oriented Team Query (COTQ). HCTP is a decision problem that considers only social and skill dimensions. We prove that HCTP is NP-Complete. Moreover, based on the hardness of HCTP, we prove that SSTQ is NP-Hard and inapproximable within any factor. On the other hand, COTQ is a special case of SSTQ that relaxes the social constraint. We prove that COTQ is NP-Hard and propose an approximation algorithm for COTQ, namely COTprox. Furthermore, based on the observations on COTprox, we devise an approximation algorithm, SSTprox, with a guaranteed error bound for SSTQ. Finally, to efficiently obtain the optimal solution to SSTQ for small instances, we design two efficient algorithms, SpatialFirst and SkillFirst, with different scenarios in mind. These two algorithms incorporate various effective ordering and pruning techniques to reduce the search space for answering SSTQ. Experimental results on real datasets indicate that the proposed algorithms can efficiently answer SSTQ under various parameter settings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2016:FDV, author = "Zhiwen Yu and Zhitao Wang and Liming Chen and Bin Guo and Wenjie Li", title = "Featuring, Detecting, and Visualizing Human Sentiment in {Chinese} Micro-Blog", journal = j-TKDD, volume = "10", number = "4", pages = "48:1--48:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2821513", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Micro-blog has been increasingly used for the public to express their opinions, and for organizations to detect public sentiment about social events or public policies. In this article, we examine and identify the key problems of this field, focusing particularly on the characteristics of innovative words, multi-media elements, and hierarchical structure of Chinese ``Weibo.'' Based on the analysis, we propose a novel approach and develop associated theoretical and technological methods to address these problems. These include a new sentiment word mining method based on three wording metrics and point-wise information, a rule set model for analyzing sentiment features of different linguistic components, and the corresponding methodology for calculating sentiment on multi-granularity considering emoticon elements as auxiliary affective factors. We evaluate our new word discovery and sentiment detection methods on a real-life Chinese micro-blog dataset. Initial results show that our new diction can improve sentiment detection, and they demonstrate that our multi-level rule set method is more effective, with the average accuracy being 10.2\% and 1.5\% higher than two existing methods for Chinese micro-blog sentiment analysis. In addition, we exploit visualization techniques to study the relationships between online sentiment and real life. The visualization of detected sentiment can help depict temporal patterns and spatial discrepancy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2016:EOL, author = "Chen Chen and Hanghang Tong and B. Aditya Prakash and Tina Eliassi-Rad and Michalis Faloutsos and Christos Faloutsos", title = "Eigen-Optimization on Large Graphs by Edge Manipulation", journal = j-TKDD, volume = "10", number = "4", pages = "49:1--49:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2903148", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:29 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Large graphs are prevalent in many applications and enable a variety of information dissemination processes, e.g., meme, virus, and influence propagation. How can we optimize the underlying graph structure to affect the outcome of such dissemination processes in a desired way (e.g., stop a virus propagation, facilitate the propagation of a piece of good idea, etc)? Existing research suggests that the leading eigenvalue of the underlying graph is the key metric in determining the so-called epidemic threshold for a variety of dissemination models. In this paper, we study the problem of how to optimally place a set of edges (e.g., edge deletion and edge addition) to optimize the leading eigenvalue of the underlying graph, so that we can guide the dissemination process in a desired way. We propose effective, scalable algorithms for edge deletion and edge addition, respectively. In addition, we reveal the intrinsic relationship between edge deletion and node deletion problems. Experimental results validate the effectiveness and efficiency of the proposed algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2016:STR, author = "Zhiwen Yu and Miao Tian and Zhu Wang and Bin Guo and Tao Mei", title = "Shop-Type Recommendation Leveraging the Data from Social Media and Location-Based Services", journal = j-TKDD, volume = "11", number = "1", pages = "1:1--1:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2930671", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "It is an important yet challenging task for investors to determine the most suitable type of shop (e.g., restaurant, fashion) for a newly opened store. Traditional ways are predominantly field surveys and empirical estimation, which are not effective as they lack shop-related data. As social media and location-based services (LBS) are becoming more and more pervasive, user-generated data from these platforms are providing rich information not only about individual consumption experiences, but also about shop attributes. In this paper, we investigate the recommendation of shop types for a given location, by leveraging heterogeneous data that are mainly historical user preferences and location context from social media and LBS. Our goal is to select the most suitable shop type, seeking to maximize the number of customers served from a candidate set of types. We propose a novel bias learning matrix factorization method with feature fusion for shop popularity prediction. Features are defined and extracted from two perspectives: location, where features are closely related to location characteristics, and commercial, where features are about the relationships between shops in the neighborhood. Experimental results show that the proposed method outperforms state-of-the-art solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{McDowell:2016:LNA, author = "Luke K. McDowell and David W. Aha", title = "Leveraging Neighbor Attributes for Classification in Sparsely Labeled Networks", journal = j-TKDD, volume = "11", number = "1", pages = "2:1--2:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2898358", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Many analysis tasks involve linked nodes, such as people connected by friendship links. Research on link-based classification (LBC) has studied how to leverage these connections to improve classification accuracy. Most such prior research has assumed the provision of a densely labeled training network. Instead, this article studies the common and challenging case when LBC must use a single sparsely labeled network for both learning and inference, a case where existing methods often yield poor accuracy. To address this challenge, we introduce a novel method that enables prediction via ``neighbor attributes,'' which were briefly considered by early LBC work but then abandoned due to perceived problems. We then explain, using both extensive experiments and loss decomposition analysis, how using neighbor attributes often significantly improves accuracy. We further show that using appropriate semi-supervised learning (SSL) is essential to obtaining the best accuracy in this domain and that the gains of neighbor attributes remain across a range of SSL choices and data conditions. Finally, given the challenges of label sparsity for LBC and the impact of neighbor attributes, we show that multiple previous studies must be re-considered, including studies regarding the best model features, the impact of noisy attributes, and strategies for active learning.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chang:2016:CSP, author = "Xiaojun Chang and Feiping Nie and Yi Yang and Chengqi Zhang and Heng Huang", title = "Convex Sparse {PCA} for Unsupervised Feature Learning", journal = j-TKDD, volume = "11", number = "1", pages = "3:1--3:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2910585", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Principal component analysis (PCA) has been widely applied to dimensionality reduction and data pre-processing for different applications in engineering, biology, social science, and the like. Classical PCA and its variants seek for linear projections of the original variables to obtain the low-dimensional feature representations with maximal variance. One limitation is that it is difficult to interpret the results of PCA. Besides, the classical PCA is vulnerable to certain noisy data. In this paper, we propose a Convex Sparse Principal Component Analysis (CSPCA) algorithm and apply it to feature learning. First, we show that PCA can be formulated as a low-rank regression optimization problem. Based on the discussion, the $ l_{2, 1}$-norm minimization is incorporated into the objective function to make the regression coefficients sparse, thereby robust to the outliers. Also, based on the sparse model used in CSPCA, an optimal weight is assigned to each of the original feature, which in turn provides the output with good interpretability. With the output of our CSPCA, we can effectively analyze the importance of each feature under the PCA criteria. Our new objective function is convex, and we propose an iterative algorithm to optimize it. We apply the CSPCA algorithm to feature selection and conduct extensive experiments on seven benchmark datasets. Experimental results demonstrate that the proposed algorithm outperforms state-of-the-art unsupervised feature selection algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2016:LLR, author = "Ou Wu and Qiang You and Fen Xia and Lei Ma and Weiming Hu", title = "Listwise Learning to Rank from Crowds", journal = j-TKDD, volume = "11", number = "1", pages = "4:1--4:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2910586", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Learning to rank has received great attention in recent years as it plays a crucial role in many applications such as information retrieval and data mining. The existing concept of learning to rank assumes that each training instance is associated with a reliable label. However, in practice, this assumption does not necessarily hold true as it may be infeasible or remarkably expensive to obtain reliable labels for many learning to rank applications. Therefore, a feasible approach is to collect labels from crowds and then learn a ranking function from crowdsourcing labels. This study explores the listwise learning to rank with crowdsourcing labels obtained from multiple annotators, who may be unreliable. A new probabilistic ranking model is first proposed by combining two existing models. Subsequently, a ranking function is trained by proposing a maximum likelihood learning approach, which estimates ground-truth labels and annotator expertise, and trains the ranking function iteratively. In practical crowdsourcing machine learning, valuable side information (e.g., professional grades) about involved annotators is normally attainable. Therefore, this study also investigates learning to rank from crowd labels when side information on the expertise of involved annotators is available. In particular, three basic types of side information are investigated, and corresponding learning algorithms are consequently introduced. Further, the top-k learning to rank from crowdsourcing labels are explored to deal with long training ranking lists. The proposed algorithms are tested on both synthetic and real-world data. Results reveal that the maximum likelihood estimation approach significantly outperforms the average approach and existing crowdsourcing regression methods. The performances of the proposed algorithms are comparable to those of the learning model in consideration reliable labels. The results of the investigation further indicate that side information is helpful in inferring both ranking functions and expertise degrees of annotators.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shao:2016:SCI, author = "Junming Shao and Qinli Yang and Hoang-Vu Dang and Bertil Schmidt and Stefan Kramer", title = "Scalable Clustering by Iterative Partitioning and Point Attractor Representation", journal = j-TKDD, volume = "11", number = "1", pages = "5:1--5:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2934688", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering very large datasets while preserving cluster quality remains a challenging data-mining task to date. In this paper, we propose an effective scalable clustering algorithm for large datasets that builds upon the concept of synchronization. Inherited from the powerful concept of synchronization, the proposed algorithm, CIPA (Clustering by Iterative Partitioning and Point Attractor Representations), is capable of handling very large datasets by iteratively partitioning them into thousands of subsets and clustering each subset separately. Using dynamic clustering by synchronization, each subset is then represented by a set of point attractors and outliers. Finally, CIPA identifies the cluster structure of the original dataset by clustering the newly generated dataset consisting of points attractors and outliers from all subsets. We demonstrate that our new scalable clustering approach has several attractive benefits: (a) CIPA faithfully captures the cluster structure of the original data by performing clustering on each separate data iteratively instead of using any sampling or statistical summarization technique. (b) It allows clustering very large datasets efficiently with high cluster quality. (c) CIPA is parallelizable and also suitable for distributed data. Extensive experiments demonstrate the effectiveness and efficiency of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Grabocka:2016:LTS, author = "Josif Grabocka and Nicolas Schilling and Lars Schmidt-Thieme", title = "Latent Time-Series Motifs", journal = j-TKDD, volume = "11", number = "1", pages = "6:1--6:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2940329", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Motifs are the most repetitive/frequent patterns of a time-series. The discovery of motifs is crucial for practitioners in order to understand and interpret the phenomena occurring in sequential data. Currently, motifs are searched among series sub-sequences, aiming at selecting the most frequently occurring ones. Search-based methods, which try out series sub-sequence as motif candidates, are currently believed to be the best methods in finding the most frequent patterns. However, this paper proposes an entirely new perspective in finding motifs. We demonstrate that searching is non-optimal since the domain of motifs is restricted, and instead we propose a principled optimization approach able to find optimal motifs. We treat the occurrence frequency as a function and time-series motifs as its parameters, therefore we learn the optimal motifs that maximize the frequency function. In contrast to searching, our method is able to discover the most repetitive patterns (hence optimal), even in cases where they do not explicitly occur as sub-sequences. Experiments on several real-life time-series datasets show that the motifs found by our method are highly more frequent than the ones found through searching, for exactly the same distance threshold.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2016:SNE, author = "Xianchao Zhang and Linlin Zong and Quanzeng You and Xing Yong", title = "Sampling for {Nystr{\"o}m} Extension-Based Spectral Clustering: Incremental Perspective and Novel Analysis", journal = j-TKDD, volume = "11", number = "1", pages = "7:1--7:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2934693", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Sampling is the key aspect for Nystr{\"o}m extension based spectral clustering. Traditional sampling schemes select the set of landmark points on a whole and focus on how to lower the matrix approximation error. However, the matrix approximation error does not have direct impact on the clustering performance. In this article, we propose a sampling framework from an incremental perspective, i.e., the landmark points are selected one by one, and each next point to be sampled is determined by previously selected landmark points. Incremental sampling builds explicit relationships among landmark points; thus, they work together well and provide a theoretical guarantee on the clustering performance. We provide two novel analysis methods and propose two schemes for selecting-the-next-one of the framework. The first scheme is based on clusterability analysis, which provides a better guarantee on clustering performance than schemes based on matrix approximation error analysis. The second scheme is based on loss analysis, which provides maximized predictive ability of the landmark points on the (implicit) labels of the unsampled points. Experimental results on a wide range of benchmark datasets demonstrate the superiorities of our proposed incremental sampling schemes over existing sampling schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qiao:2016:FST, author = "Maoying Qiao and Richard Yi Da Xu and Wei Bian and Dacheng Tao", title = "Fast Sampling for Time-Varying Determinantal Point Processes", journal = j-TKDD, volume = "11", number = "1", pages = "8:1--8:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2943785", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Determinantal Point Processes (DPPs) are stochastic models which assign each subset of a base dataset with a probability proportional to the subset's degree of diversity. It has been shown that DPPs are particularly appropriate in data subset selection and summarization (e.g., news display, video summarizations). DPPs prefer diverse subsets while other conventional models cannot offer. However, DPPs inference algorithms have a polynomial time complexity which makes it difficult to handle large and time-varying datasets, especially when real-time processing is required. To address this limitation, we developed a fast sampling algorithm for DPPs which takes advantage of the nature of some time-varying data (e.g., news corpora updating, communication network evolving), where the data changes between time stamps are relatively small. The proposed algorithm is built upon the simplification of marginal density functions over successive time stamps and the sequential Monte Carlo (SMC) sampling technique. Evaluations on both a real-world news dataset and the Enron Corpus confirm the efficiency of the proposed algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Crescenzi:2016:GIO, author = "Pierluigi Crescenzi and Gianlorenzo D'angelo and Lorenzo Severini and Yllka Velaj", title = "Greedily Improving Our Own Closeness Centrality in a Network", journal = j-TKDD, volume = "11", number = "1", pages = "9:1--9:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2953882", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The closeness centrality is a well-known measure of importance of a vertex within a given complex network. Having high closeness centrality can have positive impact on the vertex itself: hence, in this paper we consider the optimization problem of determining how much a vertex can increase its centrality by creating a limited amount of new edges incident to it. We will consider both the undirected and the directed graph cases. In both cases, we first prove that the optimization problem does not admit a polynomial-time approximation scheme (unless P = NP), and then propose a greedy approximation algorithm (with an almost tight approximation ratio), whose performance is then tested on synthetic graphs and real-world networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2016:CBN, author = "Xiang Li and Charles X. Ling and Huaimin Wang", title = "The Convergence Behavior of Naive {Bayes} on Large Sparse Datasets", journal = j-TKDD, volume = "11", number = "1", pages = "10:1--10:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2948068", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Large and sparse datasets with a lot of missing values are common in the big data era, such as user behaviors over a large number of items. Classification in such datasets is an important topic for machine learning and data mining. Practically, naive Bayes is still a popular classification algorithm for large sparse datasets, as its time and space complexity scales linearly with the size of non-missing values. However, several important questions about the behavior of naive Bayes are yet to be answered. For example, how different mechanisms of data missing, data sparsity, and the number of attributes systematically affect the learning curves and convergence? In this paper, we address several common data missing mechanisms and propose novel data generation methods based on these mechanisms. We generate large and sparse data systematically, and study the entire AUC (Area Under ROC Curve) learning curve and convergence behavior of naive Bayes. We not only have several important experiment observations, but also provide detailed theoretic studies. Finally, we summarize our empirical and theoretic results as an intuitive decision flowchart and a useful guideline for classifying large sparse datasets in practice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fu:2016:MGD, author = "Yanjie Fu and Hui Xiong and Yong Ge and Yu Zheng and Zijun Yao and Zhi-Hua Zhou", title = "Modeling of Geographic Dependencies for Real Estate Ranking", journal = j-TKDD, volume = "11", number = "1", pages = "11:1--11:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2934692", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Aug 29 07:28:30 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "It is traditionally a challenge for home buyers to understand, compare, and contrast the investment value of real estate. Although a number of appraisal methods have been developed to value real properties, the performances of these methods have been limited by traditional data sources for real estate appraisal. With the development of new ways of collecting estate-related mobile data, there is a potential to leverage geographic dependencies of real estate for enhancing real estate appraisal. Indeed, the geographic dependencies of the investment value of an estate can be from the characteristics of its own neighborhood (individual), the values of its nearby estates (peer), and the prosperity of the affiliated latent business area (zone). To this end, in this paper, we propose a geographic method, named ClusRanking, for real estate appraisal by leveraging the mutual enforcement of ranking and clustering power. ClusRanking is able to exploit geographic individual, peer, and zone dependencies in a probabilistic ranking model. Specifically, we first extract the geographic utility of estates from geography data, estimate the neighborhood popularity of estates by mining taxicab trajectory data, and model the influence of latent business areas. Also, we fuse these three influential factors and predict real estate investment value. Moreover, we simultaneously consider individual, peer and zone dependencies, and derive an estate-specific ranking likelihood as the objective function. Furthermore, we propose an improved method named CR-ClusRanking by incorporating checkin information as a regularization term which reduces the performance volatility of real estate ranking system. Finally, we conduct a comprehensive evaluation with the real estate-related data of Beijing, and the experimental results demonstrate the effectiveness of our proposed methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2016:DAC, author = "Zekai J. Gao and Chris Jermaine", title = "Distributed Algorithms for Computing Very Large Thresholded Covariance Matrices", journal = j-TKDD, volume = "11", number = "2", pages = "12:1--12:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2935750", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Computation of covariance matrices from observed data is an important problem, as such matrices are used in applications such as principal component analysis (PCA), linear discriminant analysis (LDA), and increasingly in the learning and application of probabilistic graphical models. However, computing an empirical covariance matrix is not always an easy problem. There are two key difficulties associated with computing such a matrix from a very high-dimensional dataset. The first problem is over-fitting. For a $p$-dimensional covariance matrix, there are $ p(p - 1) / 2$ unique, off-diagonal entries in the empirical covariance matrix $S$ for large $p$ (say, $ p > 10^5$), the size $n$ of the dataset is often much smaller than the number of covariances to compute. Over-fitting is a concern in any situation in which the number of parameters learned can greatly exceed the size of the dataset. Thus, there are strong theoretical reasons to expect that for high-dimensional data-even Gaussian data-the empirical covariance matrix is not a good estimate for the true covariance matrix underlying the generative process. The second problem is computational. Computing a covariance matrix takes $ O(n p^2)$ time. For large $p$ (greater than 10,000) and $n$ much greater than $p$, this is debilitating. In this article, we consider how both of these difficulties can be handled simultaneously. Specifically, a key regularization technique for high-dimensional covariance estimation is thresholding, in which the smallest or least significant entries in the covariance matrix are simply dropped and replaced with the value $0$. This suggests an obvious way to address the computational difficulty as well: First, compute the identities of the $K$ entries in the covariance matrix that are actually important in the sense that they will not be removed during thresholding, and then in a second step, compute the values of those entries. This can be done in $ O(K n)$ time. If $ K \ll p^2$ and the identities of the important entries can be computed in reasonable time, then this is a big win. The key technical contribution of this article is the design and implementation of two different distributed algorithms for approximating the identities of the important entries quickly, using sampling. We have implemented these methods and tested them using an 800-core compute cluster. Experiments have been run using real datasets having millions of data points and up to 40,000 dimensions. These experiments show that the proposed methods are both accurate and efficient.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2016:WKI, author = "Chenguang Wang and Yangqiu Song and Dan Roth and Ming Zhang and Jiawei Han", title = "World Knowledge as Indirect Supervision for Document Clustering", journal = j-TKDD, volume = "11", number = "2", pages = "13:1--13:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2953881", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "One of the key obstacles in making learning protocols realistic in applications is the need to supervise them, a costly process that often requires hiring domain experts. We consider the framework to use the world knowledge as indirect supervision. World knowledge is general-purpose knowledge, which is not designed for any specific domain. Then, the key challenges are how to adapt the world knowledge to domains and how to represent it for learning. In this article, we provide an example of using world knowledge for domain-dependent document clustering. We provide three ways to specify the world knowledge to domains by resolving the ambiguity of the entities and their types, and represent the data with world knowledge as a heterogeneous information network. Then, we propose a clustering algorithm that can cluster multiple types and incorporate the sub-type information as constraints. In the experiments, we use two existing knowledge bases as our sources of world knowledge. One is Freebase, which is collaboratively collected knowledge about entities and their organizations. The other is YAGO2, a knowledge base automatically extracted from Wikipedia and maps knowledge to the linguistic knowledge base, WordNet. Experimental results on two text benchmark datasets (20newsgroups and RCV1) show that incorporating world knowledge as indirect supervision can significantly outperform the state-of-the-art clustering algorithms as well as clustering algorithms enhanced with world knowledge features. A preliminary version of this work appeared in the proceedings of KDD 2015 [Wang et al. 2015a]. This journal version has made several major improvements. First, we have proposed a new and general learning framework for machine learning with world knowledge as indirect supervision, where document clustering is a special case in the original paper. Second, in order to make our unsupervised semantic parsing method more understandable, we add several real cases from the original sentences to the resulting logic forms with all the necessary information. Third, we add details of the three semantic filtering methods and conduct deep analysis of the three semantic filters, by using case studies to show why the conceptualization-based semantic filter can produce more accurate indirect supervision. Finally, in addition to the experiment on 20 newsgroup data and Freebase, we have extended the experiments on clustering results by using all the combinations of text (20 newsgroup, MCAT, CCAT, ECAT) and world knowledge sources (Freebase, YAGO2).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chakraborty:2016:PCS, author = "Tanmoy Chakraborty and Sriram Srinivasan and Niloy Ganguly and Animesh Mukherjee and Sanjukta Bhowmick", title = "Permanence and Community Structure in Complex Networks", journal = j-TKDD, volume = "11", number = "2", pages = "14:1--14:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2953883", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The goal of community detection algorithms is to identify densely connected units within large networks. An implicit assumption is that all the constituent nodes belong equally to their associated community. However, some nodes are more important in the community than others. To date, efforts have been primarily made to identify communities as a whole, rather than understanding to what extent an individual node belongs to its community. Therefore, most metrics for evaluating communities, for example modularity, are global. These metrics produce a score for each community, not for each individual node. In this article, we argue that the belongingness of nodes in a community is not uniform. We quantify the degree of belongingness of a vertex within a community by a new vertex-based metric called permanence. The central idea of permanence is based on the observation that the strength of membership of a vertex to a community depends upon two factors (i) the extent of connections of the vertex within its community versus outside its community, and (ii) how tightly the vertex is connected internally. We present the formulation of permanence based on these two quantities. We demonstrate that compared to other existing metrics (such as modularity, conductance, and cut-ratio), the change in permanence is more commensurate to the level of perturbation in ground-truth communities. We discuss how permanence can help us understand and utilize the structure and evolution of communities by demonstrating that it can be used to --- (i) measure the persistence of a vertex in a community, (ii) design strategies to strengthen the community structure, (iii) explore the core-periphery structure within a community, and (iv) select suitable initiators for message spreading. We further show that permanence is an excellent metric for identifying communities. We demonstrate that the process of maximizing permanence (abbreviated as MaxPerm) produces meaningful communities that concur with the ground-truth community structure of the networks more accurately than eight other popular community detection algorithms. Finally, we provide mathematical proofs to demonstrate the correctness of finding communities by maximizing permanence. In particular, we show that the communities obtained by this method are (i) less affected by the changes in vertex ordering, and (ii) more resilient to resolution limit, degeneracy of solutions, and asymptotic growth of values.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Smith:2016:PNN, author = "Laura M. Smith and Linhong Zhu and Kristina Lerman and Allon G. Percus", title = "Partitioning Networks with Node Attributes by Compressing Information Flow", journal = j-TKDD, volume = "11", number = "2", pages = "15:1--15:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2968451", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Real-world networks are often organized as modules or communities of similar nodes that serve as functional units. These networks are also rich in content, with nodes having distinguished features or attributes. In order to discover a network's modular structure, it is necessary to take into account not only its links but also node attributes. We describe an information-theoretic method that identifies modules by compressing descriptions of information flow on a network. Our formulation introduces node content into the description of information flow, which we then minimize to discover groups of nodes with similar attributes that also tend to trap the flow of information. The method is conceptually simple and does not require ad-hoc parameters to specify the number of modules or to control the relative contribution of links and node attributes to network structure. We apply the proposed method to partition real-world networks with known community structure. We demonstrate that adding node attributes helps recover the underlying community structure in content-rich networks more effectively than using links alone. In addition, we show that our method is faster and more accurate than alternative state-of-the-art algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2016:SAO, author = "Kui Yu and Xindong Wu and Wei Ding and Jian Pei", title = "Scalable and Accurate Online Feature Selection for Big Data", journal = j-TKDD, volume = "11", number = "2", pages = "16:1--16:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2976744", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Feature selection is important in many big data applications. Two critical challenges closely associate with big data. First, in many big data applications, the dimensionality is extremely high, in millions, and keeps growing. Second, big data applications call for highly scalable feature selection algorithms in an online manner such that each feature can be processed in a sequential scan. We present SAOLA, a {Scalable and Accurate On Line Approach} for feature selection in this paper. With a theoretical analysis on bounds of the pairwise correlations between features, SAOLA employs novel pairwise comparison techniques and maintains a parsimonious model over time in an online manner. Furthermore, to deal with upcoming features that arrive by groups, we extend the SAOLA algorithm, and then propose a new group-SAOLA algorithm for online group feature selection. The group-SAOLA algorithm can online maintain a set of feature groups that is sparse at the levels of both groups and individual features simultaneously. An empirical study using a series of benchmark real datasets shows that our two algorithms, SAOLA and group-SAOLA, are scalable on datasets of extremely high dimensionality and have superior performance over the state-of-the-art feature selection methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2016:SAU, author = "Bin Liu and Yao Wu and Neil Zhenqiang Gong and Junjie Wu and Hui Xiong and Martin Ester", title = "Structural Analysis of User Choices for Mobile App Recommendation", journal = j-TKDD, volume = "11", number = "2", pages = "17:1--17:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2983533", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Advances in smartphone technology have promoted the rapid development of mobile apps. However, the availability of a huge number of mobile apps in application stores has imposed the challenge of finding the right apps to meet the user needs. Indeed, there is a critical demand for personalized app recommendations. Along this line, there are opportunities and challenges posed by two unique characteristics of mobile apps. First, app markets have organized apps in a hierarchical taxonomy. Second, apps with similar functionalities are competing with each other. Although there are a variety of approaches for mobile app recommendations, these approaches do not have a focus on dealing with these opportunities and challenges. To this end, in this article, we provide a systematic study for addressing these challenges. Specifically, we develop a structural user choice model (SUCM) to learn fine-grained user preferences by exploiting the hierarchical taxonomy of apps as well as the competitive relationships among apps. Moreover, we design an efficient learning algorithm to estimate the parameters for the SUCM model. Finally, we perform extensive experiments on a large app adoption dataset collected from Google Play. The results show that SUCM consistently outperforms state-of-the-art Top-N recommendation methods by a significant margin.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Afrati:2016:APD, author = "Foto Afrati and Shlomi Dolev and Ephraim Korach and Shantanu Sharma and Jeffrey D. Ullman", title = "Assignment Problems of Different-Sized Inputs in {MapReduce}", journal = j-TKDD, volume = "11", number = "2", pages = "18:1--18:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2987376", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A MapReduce algorithm can be described by a mapping schema, which assigns inputs to a set of reducers, such that for each required output there exists a reducer that receives all the inputs participating in the computation of this output. Reducers have a capacity that limits the sets of inputs they can be assigned. However, individual inputs may vary in terms of size. We consider, for the first time, mapping schemas where input sizes are part of the considerations and restrictions. One of the significant parameters to optimize in any MapReduce job is communication cost between the map and reduce phases. The communication cost can be optimized by minimizing the number of copies of inputs sent to the reducers. The communication cost is closely related to the number of reducers of constrained capacity that are used to accommodate appropriately the inputs, so that the requirement of how the inputs must meet in a reducer is satisfied. In this work, we consider a family of problems where it is required that each input meets with each other input in at least one reducer. We also consider a slightly different family of problems in which each input of a list, X, is required to meet each input of another list, Y, in at least one reducer. We prove that finding an optimal mapping schema for these families of problems is NP-hard, and present a bin-packing-based approximation algorithm for finding a near optimal mapping schema.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2016:UHM, author = "Zhongyuan Wang and Fang Wang and Haixun Wang and Zhirui Hu and Jun Yan and Fangtao Li and Ji-Rong Wen and Zhoujun Li", title = "Unsupervised Head-Modifier Detection in Search Queries", journal = j-TKDD, volume = "11", number = "2", pages = "19:1--19:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2988235", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Interpreting the user intent in search queries is a key task in query understanding. Query intent classification has been widely studied. In this article, we go one step further to understand the query from the view of head-modifier analysis. For example, given the query ``popular iphone 5 smart cover,'' instead of using coarse-grained semantic classes (e.g., find electronic product), we interpret that ``smart cover'' is the head or the intent of the query and ``iphone 5'' is its modifier. Query head-modifier detection can help search engines to obtain particularly relevant content, which is also important for applications such as ads matching and query recommendation. We introduce an unsupervised semantic approach for query head-modifier detection. First, we mine a large number of instance level head-modifier pairs from search log. Then, we develop a conceptualization mechanism to generalize the instance level pairs to concept level. Finally, we derive weighted concept patterns that are concise, accurate, and have strong generalization power in head-modifier detection. The developed mechanism has been used in production for search relevance and ads matching. We use extensive experiment results to demonstrate the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chang:2016:LMB, author = "Yi Chang and Makoto Yamada and Antonio Ortega and Yan Liu", title = "Lifecycle Modeling for Buzz Temporal Pattern Discovery", journal = j-TKDD, volume = "11", number = "2", pages = "20:1--20:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2994605", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In social media analysis, one critical task is detecting a burst of topics or buzz, which is reflected by extremely frequent mentions of certain keywords in a short-time interval. Detecting buzz not only provides useful insights into the information propagation mechanism, but also plays an essential role in preventing malicious rumors. However, buzz modeling is a challenging task because a buzz time-series often exhibits sudden spikes and heavy tails, wherein most existing time-series models fail. In this article, we propose novel buzz modeling approaches that capture the rise and fade temporal patterns via Product Lifecycle (PLC) model, a classical concept in economics. More specifically, we propose to model multiple peaks in buzz time-series with PLC mixture or PLC group mixture and develop a probabilistic graphical model (K-Mixture of Product Lifecycle) (K-MPLC) to automatically discover inherent lifecycle patterns within a collection of buzzes. Furthermore, we effectively utilize the model parameters of PLC mixture or PLC group mixture for burst prediction. Our experimental results show that our proposed methods significantly outperform existing leading approaches on buzz clustering and buzz-type prediction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wei:2016:NBG, author = "Qiang Wei and Dandan Qiao and Jin Zhang and Guoqing Chen and Xunhua Guo", title = "A Novel Bipartite Graph Based Competitiveness Degree Analysis from Query Logs", journal = j-TKDD, volume = "11", number = "2", pages = "21:1--21:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2996196", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Competitiveness degree analysis is a focal point of business strategy and competitive intelligence, aimed to help managers closely monitor to what extent their rivals are competing with them. This article proposes a novel method, namely BCQ, to measure the competitiveness degree between peers from query logs as an important form of user generated contents, which reflects the ``wisdom of crowds'' from the search engine users' perspective. In doing so, a bipartite graph model is developed to capture the competitive relationships through conjoint attributes hidden in query logs, where the notion of competitiveness degree for entity pairs is introduced, and then used to identify the competitive paths mapped in the bipartite graph. Subsequently, extensive experiments are conducted to demonstrate the effectiveness of BCQ to quantify the competitiveness degrees. Experimental results reveal that BCQ can well support competitors ranking, which is helpful for devising competitive strategies and pursuing market performance. In addition, efficiency experiments on synthetic data show a good scalability of BCQ on large scale of query logs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pei:2016:CCP, author = "Yuanli Pei and Xiaoli Z. Fern and Teresa Vania Tjahja and R{\'o}mer Rosales", title = "Comparing Clustering with Pairwise and Relative Constraints: a Unified Framework", journal = j-TKDD, volume = "11", number = "2", pages = "22:1--22:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2996467", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering can be improved with the help of side information about the similarity relationships among instances. Such information has been commonly represented by two types of constraints: pairwise constraints and relative constraints, regarding similarities about instance pairs and triplets, respectively. Prior work has mostly considered these two types of constraints separately and developed individual algorithms to learn from each type. In practice, however, it is critical to understand/compare the usefulness of the two types of constraints as well as the cost of acquiring them, which has not been studied before. This paper provides an extensive comparison of clustering with these two types of constraints. Specifically, we compare their impacts both on human users that provide such constraints and on the learning system that incorporates such constraints into clustering. In addition, to ensure that the comparison of clustering is performed on equal ground (without the potential bias introduced by different learning algorithms), we propose a probabilistic semi-supervised clustering framework that can learn from either type of constraints. Our experiments demonstrate that the proposed semi-supervised clustering framework is highly effective at utilizing both types of constraints to aid clustering. Our user study provides valuable insights regarding the impact of the constraints on human users, and our experiments on clustering with the human-labeled constraints reveal that relative constraint is often more efficient at improving clustering.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lorenzetti:2016:MTS, author = "Carlos Lorenzetti and Ana Maguitman and David Leake and Filippo Menczer and Thomas Reichherzer", title = "Mining for Topics to Suggest Knowledge Model Extensions", journal = j-TKDD, volume = "11", number = "2", pages = "23:1--23:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2997657", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Electronic concept maps, interlinked with other concept maps and multimedia resources, can provide rich knowledge models to capture and share human knowledge. This article presents and evaluates methods to support experts as they extend existing knowledge models, by suggesting new context-relevant topics mined from Web search engines. The task of generating topics to support knowledge model extension raises two research questions: first, how to extract topic descriptors and discriminators from concept maps; and second, how to use these topic descriptors and discriminators to identify candidate topics on the Web with the right balance of novelty and relevance. To address these questions, this article first develops the theoretical framework required for a ``topic suggester'' to aid information search in the context of a knowledge model under construction. It then presents and evaluates algorithms based on this framework and applied in Extender, an implemented tool for topic suggestion. Extender has been developed and tested within CmapTools, a widely used system for supporting knowledge modeling using concept maps. However, the generality of the algorithms makes them applicable to a broad class of knowledge modeling systems, and to Web search in general.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kumar:2016:ACT, author = "Dheeraj Kumar and James C. Bezdek and Sutharshan Rajasegarar and Marimuthu Palaniswami and Christopher Leckie and Jeffrey Chan and Jayavardhana Gubbi", title = "Adaptive Cluster Tendency Visualization and Anomaly Detection for Streaming Data", journal = j-TKDD, volume = "11", number = "2", pages = "24:1--24:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2997656", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The growth in pervasive network infrastructure called the Internet of Things (IoT) enables a wide range of physical objects and environments to be monitored in fine spatial and temporal detail. The detailed, dynamic data that are collected in large quantities from sensor devices provide the basis for a variety of applications. Automatic interpretation of these evolving large data is required for timely detection of interesting events. This article develops and exemplifies two new relatives of the visual assessment of tendency (VAT) and improved visual assessment of tendency (iVAT) models, which uses cluster heat maps to visualize structure in static datasets. One new model is initialized with a static VAT/iVAT image, and then incrementally (hence inc-VAT/inc-iVAT) updates the current minimal spanning tree (MST) used by VAT with an efficient edge insertion scheme. Similarly, dec-VAT/dec-iVAT efficiently removes a node from the current VAT MST. A sequence of inc-iVAT/dec-iVAT images can be used for (visual) anomaly detection in evolving data streams and for sliding window based cluster assessment for time series data. The method is illustrated with four real datasets (three of them being smart city IoT data). The evaluation demonstrates the algorithms' ability to successfully isolate anomalies and visualize changing cluster structure in the streaming data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2016:EVM, author = "Wen-Yuan Zhu and Wen-Chih Peng and Ling-Jyh Chen and Kai Zheng and Xiaofang Zhou", title = "Exploiting Viral Marketing for Location Promotion in Location-Based Social Networks", journal = j-TKDD, volume = "11", number = "2", pages = "25:1--25:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3001938", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 26 17:17:00 MST 2016", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the explosion of smartphones and social network services, location-based social networks (LBSNs) are increasingly seen as tools for businesses (e.g., restaurants and hotels) to promote their products and services. In this article, we investigate the key techniques that can help businesses promote their locations by advertising wisely through the underlying LBSNs. In order to maximize the benefit of location promotion, we formalize it as an influence maximization problem in an LBSN, i.e., given a target location and an LBSN, a set of k users (called seeds) should be advertised initially such that they can successfully propagate and attract many other users to visit the target location. Existing studies have proposed different ways to calculate the information propagation probability, that is, how likely it is that a user may influence another, in the setting of a static social network. However, it is more challenging to derive the propagation probability in an LBSN since it is heavily affected by the target location and the user mobility, both of which are dynamic and query dependent. This article proposes two user mobility models, namely the Gaussian-based and distance-based mobility models, to capture the check-in behavior of individual LBSN users, based on which location-aware propagation probabilities can be derived. Extensive experiments based on two real LBSN datasets have demonstrated the superior effectiveness of our proposals compared with existing static models of propagation probabilities to truly reflect the information propagation in LBSNs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sariyuce:2017:GMF, author = "Ahmet Erdem Sariy{\"u}ce and Kamer Kaya and Erik Saule and {\"U}mit V. {\c{C}}ataly{\"u}rek", title = "Graph Manipulations for Fast Centrality Computation", journal = j-TKDD, volume = "11", number = "3", pages = "26:1--26:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3022668", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The betweenness and closeness metrics are widely used metrics in many network analysis applications. Yet, they are expensive to compute. For that reason, making the betweenness and closeness centrality computations faster is an important and well-studied problem. In this work, we propose the framework BADIOS that manipulates the graph by compressing it and splitting into pieces so that the centrality computation can be handled independently for each piece. Experimental results show that the proposed techniques can be a great arsenal to reduce the centrality computation time for various types and sizes of networks. In particular, it reduces the betweenness centrality computation time of a 4.6 million edges graph from more than 5 days to less than 16 hours. For the same graph, the closeness computation time is decreased from more than 3 days to 6 hours (12.7x speedup).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rozenshtein:2017:FDD, author = "Polina Rozenshtein and Nikolaj Tatti and Aristides Gionis", title = "Finding Dynamic Dense Subgraphs", journal = j-TKDD, volume = "11", number = "3", pages = "27:1--27:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3046791", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Online social networks are often defined by considering interactions of entities at an aggregate level. For example, a call graph is formed among individuals who have called each other at least once; or at least k times. Similarly, in social-media platforms, we consider implicit social networks among users who have interacted in some way, e.g., have made a conversation, have commented to the content of each other, and so on. Such definitions have been used widely in the literature and they have offered significant insights regarding the structure of social networks. However, it is obvious that they suffer from a severe limitation: They neglect the precise time that interactions among the network entities occur. In this article, we consider interaction networks, where the data description contains not only information about the underlying topology of the social network, but also the exact time instances that network entities interact. In an interaction network, an edge is associated with a timestamp, and multiple edges may occur for the same pair of entities. Consequently, interaction networks offer a more fine-grained representation, which can be leveraged to reveal otherwise hidden dynamic phenomena. In the setting of interaction networks, we study the problem of discovering dynamic dense subgraphs whose edges occur in short time intervals. We view such subgraphs as fingerprints of dynamic activity occurring within network communities. Such communities represent groups of individuals who interact with each other in specific time instances, for example, a group of employees who work on a project and whose interaction intensifies before certain project milestones. We prove that the problem we define is NP -hard, and we provide efficient algorithms by adapting techniques for finding dense subgraphs. We also show how to speed-up the proposed methods by exploiting concavity properties of our objective function and by the means of fractional programming. We perform extensive evaluation of the proposed methods on synthetic and real datasets, which demonstrates the validity of our approach and shows that our algorithms can be used to obtain high-quality results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2017:MBM, author = "Guannan Liu and Yanjie Fu and Guoqing Chen and Hui Xiong and Can Chen", title = "Modeling Buying Motives for Personalized Product Bundle Recommendation", journal = j-TKDD, volume = "11", number = "3", pages = "28:1--28:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3022185", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Product bundling is a marketing strategy that offers several products/items for sale as one bundle. While the bundling strategy has been widely used, less efforts have been made to understand how items should be bundled with respect to consumers' preferences and buying motives for product bundles. This article investigates the relationships between the items that are bought together within a product bundle. To that end, each purchased product bundle is formulated as a bundle graph with items as nodes and the associations between pairs of items in the bundle as edges. The relationships between items can be analyzed by the formation of edges in bundle graphs, which can be attributed to the associations of feature aspects. Then, a probabilistic model BPM (Bundle Purchases with Motives) is proposed to capture the composition of each bundle graph, with two latent factors node-type and edge-type introduced to describe the feature aspects and relationships respectively. Furthermore, based on the preferences inferred from the model, an approach for recommending items to form product bundles is developed by estimating the probability that a consumer would buy an associative item together with the item already bought in the shopping cart. Finally, experimental results on real-world transaction data collected from well-known shopping sites show the effectiveness advantages of the proposed approach over other baseline methods. Moreover, the experiments also show that the proposed model can explain consumers' buying motives for product bundles in terms of different node-types and edge-types.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2017:CSN, author = "Ting Guo and Jia Wu and Xingquan Zhu and Chengqi Zhang", title = "Combining Structured Node Content and Topology Information for Networked Graph Clustering", journal = j-TKDD, volume = "11", number = "3", pages = "29:1--29:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996197", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graphs are popularly used to represent objects with shared dependency relationships. To date, all existing graph clustering algorithms consider each node as a single attribute or a set of independent attributes, without realizing that content inside each node may also have complex structures. In this article, we formulate a new networked graph clustering task where a network contains a set of inter-connected (or networked) super-nodes, each of which is a single-attribute graph. The new super-node representation is applicable to many real-world applications, such as a citation network where each node denotes a paper whose content can be described as a graph, and citation relationships between papers form a networked graph (i.e., a super-graph). Networked graph clustering aims to find similar node groups, each of which contains nodes with similar content and structure information. The main challenge is to properly calculate the similarity between super-nodes for clustering. To solve the problem, we propose to characterize node similarity by integrating structure and content information of each super-node. To measure node content similarity, we use cosine distance by considering overlapped attributes between two super-nodes. To measure structure similarity, we propose an Attributed Random Walk Kernel (ARWK) to calculate the similarity between super-nodes. Detailed node content analysis is also included to build relationships between super-nodes with shared internal structure information, so the structure similarity can be calculated in a precise way. By integrating the structure similarity and content similarity as one matrix, the spectral clustering is used to achieve networked graph clustering. Our method enjoys sound theoretical properties, including bounded similarities and better structure similarity assessment than traditional graph clustering methods. Experiments on real-world applications demonstrate that our method significantly outperforms baseline approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2017:IPV, author = "Qi Liu and Biao Xiang and Nicholas Jing Yuan and Enhong Chen and Hui Xiong and Yi Zheng and Yu Yang", title = "An Influence Propagation View of {PageRank}", journal = j-TKDD, volume = "11", number = "3", pages = "30:1--30:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3046941", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "For a long time, PageRank has been widely used for authority computation and has been adopted as a solid baseline for evaluating social influence related applications. However, when measuring the authority of network nodes, the traditional PageRank method does not take the nodes' prior knowledge into consideration. Also, the connection between PageRank and social influence modeling methods is not clearly established. To that end, this article provides a focused study on understanding PageRank as well as the relationship between PageRank and social influence analysis. Along this line, we first propose a linear social influence model and reveal that this model generalizes the PageRank-based authority computation by introducing some constraints. Then, we show that the authority computation by PageRank can be enhanced if exploiting more reasonable constraints (e.g., from prior knowledge). Next, to deal with the computational challenge of linear model with general constraints, we provide an upper bound for identifying nodes with top authorities. Moreover, we extend the proposed linear model for better measuring the authority of the given node sets, and we also demonstrate the way to quickly identify the top authoritative node sets. Finally, extensive experimental evaluations on four real-world networks validate the effectiveness of the proposed linear model with respect to different constraint settings. The results show that the methods with more reasonable constraints can lead to better ranking and recommendation performance. Meanwhile, the upper bounds formed by PageRank values could be used to quickly locate the nodes and node sets with the highest authorities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2017:LMD, author = "Sen Wang and Xue Li and Xiaojun Chang and Lina Yao and Quan Z. Sheng and Guodong Long", title = "Learning Multiple Diagnosis Codes for {ICU} Patients with Local Disease Correlation Mining", journal = j-TKDD, volume = "11", number = "3", pages = "31:1--31:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3003729", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In the era of big data, a mechanism that can automatically annotate disease codes to patients' records in the medical information system is in demand. The purpose of this work is to propose a framework that automatically annotates the disease labels of multi-source patient data in Intensive Care Units (ICUs). We extract features from two main sources, medical charts and notes. The Bag-of-Words model is used to encode the features. Unlike most of the existing multi-label learning algorithms that globally consider correlations between diseases, our model learns disease correlation locally in the patient data. To achieve this, we derive a local disease correlation representation to enrich the discriminant power of each patient data. This representation is embedded into a unified multi-label learning framework. We develop an alternating algorithm to iteratively optimize the objective function. Extensive experiments have been conducted on a real-world ICU database. We have compared our algorithm with representative multi-label learning algorithms. Evaluation results have shown that our proposed method has state-of-the-art performance in the annotation of multiple diagnostic codes for ICU patients. This study suggests that problems in the automated diagnosis code annotation can be reliably addressed by using a multi-label learning model that exploits disease correlation. The findings of this study will greatly benefit health care and management in ICU considering that the automated diagnosis code annotation can significantly improve the quality and management of health care for both patients and caregivers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bae:2017:SEF, author = "Seung-Hee Bae and Daniel Halperin and Jevin D. West and Martin Rosvall and Bill Howe", title = "Scalable and Efficient Flow-Based Community Detection for Large-Scale Graph Analysis", journal = j-TKDD, volume = "11", number = "3", pages = "32:1--32:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2992785", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community detection is an increasingly popular approach to uncover important structures in large networks. Flow-based community detection methods rely on communication patterns of the network rather than structural properties to determine communities. The Infomap algorithm in particular optimizes a novel objective function called the map equation and has been shown to outperform other approaches in third-party benchmarks. However, Infomap and its variants are inherently sequential, limiting their use for large-scale graphs. In this article, we propose a novel algorithm to optimize the map equation called RelaxMap. RelaxMap provides two important improvements over Infomap: parallelization, so that the map equation can be optimized over much larger graphs, and prioritization, so that the most important work occurs first, iterations take less time, and the algorithm converges faster. We implement these techniques using OpenMP on shared-memory multicore systems, and evaluate our approach on a variety of graphs from standard graph clustering benchmarks as well as real graph datasets. Our evaluation shows that both techniques are effective: RelaxMap achieves 70\% parallel efficiency on eight cores, and prioritization improves algorithm performance by an additional 20--50\% on average, depending on the graph properties. Additionally, RelaxMap converges in the similar number of iterations and provides solutions of equivalent quality as the serial Infomap implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Peng:2017:RGR, author = "Chong Peng and Zhao Kang and Yunhong Hu and Jie Cheng and Qiang Cheng", title = "Robust Graph Regularized Nonnegative Matrix Factorization for Clustering", journal = j-TKDD, volume = "11", number = "3", pages = "33:1--33:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3003730", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Matrix factorization is often used for data representation in many data mining and machine-learning problems. In particular, for a dataset without any negative entries, nonnegative matrix factorization (NMF) is often used to find a low-rank approximation by the product of two nonnegative matrices. With reduced dimensions, these matrices can be effectively used for many applications such as clustering. The existing methods of NMF are often afflicted with their sensitivity to outliers and noise in the data. To mitigate this drawback, in this paper, we consider integrating NMF into a robust principal component model, and design a robust formulation that effectively captures noise and outliers in the approximation while incorporating essential nonlinear structures. A set of comprehensive empirical evaluations in clustering applications demonstrates that the proposed method has strong robustness to gross errors and superior performance to current state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2017:PSS, author = "Xun Tang and Maha Alabduljalil and Xin Jin and Tao Yang", title = "Partitioned Similarity Search with Cache-Conscious Data Traversal", journal = j-TKDD, volume = "11", number = "3", pages = "34:1--34:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3014060", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "All pairs similarity search (APSS) is used in many web search and data mining applications. Previous work has used techniques such as comparison filtering, inverted indexing, and parallel accumulation of partial results. However, shuffling intermediate results can incur significant communication overhead as data scales up. This paper studies a scalable two-phase approach called Partition-based Similarity Search (PSS). The first phase is to partition the data and group vectors that are potentially similar. The second phase is to run a set of tasks where each task compares a partition of vectors with other candidate partitions. Due to data sparsity and the presence of memory hierarchy, accessing feature vectors during the partition comparison phase incurs significant overhead. This paper introduces a cache-conscious design for data layout and traversal to reduce access time through size-controlled data splitting and vector coalescing, and it provides an analysis to guide the choice of optimization parameters. The evaluation results show that for the tested datasets, the proposed approach can lead to an early elimination of unnecessary I/O and data communication while sustaining parallel efficiency with one order of magnitude of performance improvement and it can also be integrated with LSH for approximated APSS.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2017:RBC, author = "Shanshan Feng and Jian Cao and Jie Wang and Shiyou Qian", title = "Recommendations Based on Comprehensively Exploiting the Latent Factors Hidden in Items' Ratings and Content", journal = j-TKDD, volume = "11", number = "3", pages = "35:1--35:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3003728", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "To improve the performance of recommender systems in a practical manner, several hybrid approaches have been developed by considering item ratings and content information simultaneously. However, most of these hybrid approaches make recommendations based on aggregating different recommendation techniques using various strategies, rather than considering joint modeling of the item's ratings and content, and thus fail to detect many latent factors that could potentially improve the performance of the recommender systems. For this reason, these approaches continue to suffer from data sparsity and do not work well for recommending items to individual users. A few studies try to describe a user's preference by detecting items' latent features from content-description texts as compensation for the sparse ratings. Unfortunately, most of these methods are still generally unable to accomplish recommendation tasks well for two reasons: (1) they learn latent factors from text descriptions or user--item ratings independently, rather than combining them together; and (2) influences of latent factors hidden in texts and ratings are not fully explored. In this study, we propose a probabilistic approach that we denote as latent random walk (LRW) based on the combination of an integrated latent topic model and random walk (RW) with the restart method, which can be used to rank items according to expected user preferences by detecting both their explicit and implicit correlative information, in order to recommend top-ranked items to potentially interested users. As presented in this article, the goal of this work is to comprehensively discover latent factors hidden in items' ratings and content in order to alleviate the data sparsity problem and to improve the performance of recommender systems. The proposed topic model provides a generative probabilistic framework that discovers users' implicit preferences and items' latent features simultaneously by exploiting both ratings and item content information. On the basis of this probabilistic framework, RW can predict a user's preference for unrated items by discovering global latent relations. In order to show the efficiency of the proposed approach, we test LRW and other state-of-the-art methods on three real-world datasets, namely, CAMRa2011, Yahoo!, and APP. The experiments indicate that our approach outperforms all comparative methods and, in addition, that it is less sensitive to the data sparsity problem, thus demonstrating the robustness of LRW for recommendation tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2017:SPM, author = "Xutong Liu and Feng Chen and Yen-Cheng Lu and Chang-Tien Lu", title = "Spatial Prediction for Multivariate Non-{Gaussian} Data", journal = j-TKDD, volume = "11", number = "3", pages = "36:1--36:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3022669", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the ever increasing volume of geo-referenced datasets, there is a real need for better statistical estimation and prediction techniques for spatial analysis. Most existing approaches focus on predicting multivariate Gaussian spatial processes, but as the data may consist of non-Gaussian (or mixed type) variables, this creates two challenges: (1) how to accurately capture the dependencies among different data types, both Gaussian and non-Gaussian; and (2) how to efficiently predict multivariate non-Gaussian spatial processes. In this article, we propose a generic approach for predicting multiple response variables of mixed types. The proposed approach accurately captures cross-spatial dependencies among response variables and reduces the computational burden by projecting the spatial process to a lower dimensional space with knot-based techniques. Efficient approximations are provided to estimate posterior marginals of latent variables for the predictive process, and extensive experimental evaluations based on both simulation and real-life datasets are provided to demonstrate the effectiveness and efficiency of this new approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2017:MDP, author = "Liang Wang and Zhiwen Yu and Bin Guo and Tao Ku and Fei Yi", title = "Moving Destination Prediction Using Sparse Dataset: a Mobility Gradient Descent Approach", journal = j-TKDD, volume = "11", number = "3", pages = "37:1--37:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3051128", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Moving destination prediction offers an important category of location-based applications and provides essential intelligence to business and governments. In existing studies, a common approach to destination prediction is to match the given query trajectory with massive recorded trajectories by similarity calculation. Unfortunately, due to privacy concerns, budget constraints, and many other factors, in most circumstances, we can only obtain a sparse trajectory dataset. In sparse dataset, the available moving trajectories are far from enough to cover all possible query trajectories; thus the predictability of the matching-based approach will decrease remarkably. Toward destination prediction with sparse dataset, instead of searching similar trajectories over the sparse records, we alternatively examine the changes of distances from sampling locations to final destination on query trajectory. The underlying idea is intuitive: It is directly motivated by travel purpose, people always get closer to the final destination during the movement. By borrowing the conception of gradient descent in optimization theory, we propose a novel moving destination prediction approach, namely MGDPre. Building upon the mobility gradient descent, MGDPre only investigates the behavior characteristics of query trajectory itself without matching historical trajectories, and thus is applicable for sparse dataset. We evaluate our approach based on extensive experiments, using GPS trajectories generated by a sample of taxis over a 10-day period in Shenzhen city, China. The results demonstrate that the effectiveness, efficiency, and scalability of our approach outperform state-of-the-art baseline methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fountoulakis:2017:RRA, author = "Kimon Fountoulakis and Abhisek Kundu and Eugenia-Maria Kontopoulou and Petros Drineas", title = "A Randomized Rounding Algorithm for Sparse {PCA}", journal = j-TKDD, volume = "11", number = "3", pages = "38:1--38:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3046948", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present and analyze a simple, two-step algorithm to approximate the optimal solution of the sparse PCA problem. In the proposed approach, we first solve an $ l_1$-penalized version of the NP-hard sparse PCA optimization problem and then we use a randomized rounding strategy to sparsify the resulting dense solution. Our main theoretical result guarantees an additive error approximation and provides a tradeoff between sparsity and accuracy. Extensive experimental evaluation indicates that the proposed approach is competitive in practice, even compared to state-of-the-art toolboxes such as Spasm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Aggarwal:2017:ISI, author = "Charu C. Aggarwal", title = "Introduction to Special Issue on the Best Papers from {KDD 2016}", journal = j-TKDD, volume = "11", number = "4", pages = "39:1--39:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3092689", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This issue contains the best papers from the ACM KDD Conference 2016. As is customary at KDD, special issue papers are invited only from the research track. The top-ranked papers from the KDD 2016 conference are included in this issue. This issue contains a total of six articles, which are from different areas of data mining. A brief description of these articles is also provided in this article.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2017:RCA, author = "Wei Cheng and Jingchao Ni and Kai Zhang and Haifeng Chen and Guofei Jiang and Yu Shi and Xiang Zhang and Wei Wang", title = "Ranking Causal Anomalies for System Fault Diagnosis via Temporal and Dynamical Analysis on Vanishing Correlations", journal = j-TKDD, volume = "11", number = "4", pages = "40:1--40:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3046946", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Detecting system anomalies is an important problem in many fields such as security, fault management, and industrial optimization. Recently, invariant network has shown to be powerful in characterizing complex system behaviours. In the invariant network, a node represents a system component and an edge indicates a stable, significant interaction between two components. Structures and evolutions of the invariance network, in particular the vanishing correlations, can shed important light on locating causal anomalies and performing diagnosis. However, existing approaches to detect causal anomalies with the invariant network often use the percentage of vanishing correlations to rank possible casual components, which have several limitations: (1) fault propagation in the network is ignored, (2) the root casual anomalies may not always be the nodes with a high percentage of vanishing correlations, (3) temporal patterns of vanishing correlations are not exploited for robust detection, and (4) prior knowledge on anomalous nodes are not exploited for (semi-)supervised detection. To address these limitations, in this article we propose a network diffusion based framework to identify significant causal anomalies and rank them. Our approach can effectively model fault propagation over the entire invariant network and can perform joint inference on both the structural and the time-evolving broken invariance patterns. As a result, it can locate high-confidence anomalies that are truly responsible for the vanishing correlations and can compensate for unstructured measurement noise in the system. Moreover, when the prior knowledge on the anomalous status of some nodes are available at certain time points, our approach is able to leverage them to further enhance the anomaly inference accuracy. When the prior knowledge is noisy, our approach also automatically learns reliable information and reduces impacts from noises. By performing extensive experiments on synthetic datasets, bank information system datasets, and coal plant cyber-physical system datasets, we demonstrate the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2017:CDM, author = "Tianyang Zhang and Peng Cui and Christos Faloutsos and Yunfei Lu and Hao Ye and Wenwu Zhu and Shiqiang Yang", title = "{comeNgo}: a Dynamic Model for Social Group Evolution", journal = j-TKDD, volume = "11", number = "4", pages = "41:1--41:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3059214", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How do social groups, such as Facebook groups and Wechat groups, dynamically evolve over time? How do people join the social groups, uniformly or with burst? What is the pattern of people quitting from groups? Is there a simple universal model to depict the come-and-go patterns of various groups? In this article, we examine temporal evolution patterns of more than 100 thousands social groups with more than 10 million users. We surprisingly find that the evolution patterns of real social groups goes far beyond the classic dynamic models like SI and SIR. For example, we observe both diffusion and non-diffusion mechanism in the group joining process, and power-law decay in group quitting process, rather than exponential decay as expected in SIR model. Therefore, we propose a new model comeNgo, a concise yet flexible dynamic model for group evolution. Our model has the following advantages: (a) Unification power: it generalizes earlier theoretical models and different joining and quitting mechanisms we find from observation. (b) Succinctness and interpretability: it contains only six parameters with clear physical meanings. (c) Accuracy: it can capture various kinds of group evolution patterns preciously, and the goodness of fit increases by 58\% over baseline. (d) Usefulness: it can be used in multiple application scenarios, such as forecasting and pattern discovery. Furthermore, our model can provide insights about different evolution patterns of social groups, and we also find that group structure and its evolution has notable relations with temporal patterns of group evolution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2017:CDI, author = "Chen Chen and Hanghang Tong and Lei Xie and Lei Ying and Qing He", title = "Cross-Dependency Inference in Multi-Layered Networks: a Collaborative Filtering Perspective", journal = j-TKDD, volume = "11", number = "4", pages = "42:1--42:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3056562", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The increasingly connected world has catalyzed the fusion of networks from different domains, which facilitates the emergence of a new network model-multi-layered networks. Examples of such kind of network systems include critical infrastructure networks, biological systems, organization-level collaborations, cross-platform e-commerce, and so forth. One crucial structure that distances multi-layered network from other network models is its cross-layer dependency, which describes the associations between the nodes from different layers. Needless to say, the cross-layer dependency in the network plays an essential role in many data mining applications like system robustness analysis and complex network control. However, it remains a daunting task to know the exact dependency relationships due to noise, limited accessibility, and so forth. In this article, we tackle the cross-layer dependency inference problem by modeling it as a collective collaborative filtering problem. Based on this idea, we propose an effective algorithm Fascinate that can reveal unobserved dependencies with linear complexity. Moreover, we derive Fascinate-ZERO, an online variant of Fascinate that can respond to a newly added node timely by checking its neighborhood dependencies. We perform extensive evaluations on real datasets to substantiate the superiority of our proposed approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{DeStefani:2017:TCL, author = "Lorenzo {De Stefani} and Alessandro Epasto and Matteo Riondato and Eli Upfal", title = "{TRI{\`E}ST}: Counting Local and Global Triangles in Fully Dynamic Streams with Fixed Memory Size", journal = j-TKDD, volume = "11", number = "4", pages = "43:1--43:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3059194", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "``Ogni lassada xe persa.''$^1$ --- Proverb from Trieste, Italy. We present tri{\`e}st, a suite of one-pass streaming algorithms to compute unbiased, low-variance, high-quality approximations of the global and local (i.e., incident to each vertex) number of triangles in a fully dynamic graph represented as an adversarial stream of edge insertions and deletions. Our algorithms use reservoir sampling and its variants to exploit the user-specified memory space at all times. This is in contrast with previous approaches, which require hard-to-choose parameters (e.g., a fixed sampling probability) and offer no guarantees on the amount of memory they use. We analyze the variance of the estimations and show novel concentration bounds for these quantities. Our experimental results on very large graphs demonstrate that tri{\`e}st outperforms state-of-the-art approaches in accuracy and exhibits a small update time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hooi:2017:GBF, author = "Bryan Hooi and Kijung Shin and Hyun Ah Song and Alex Beutel and Neil Shah and Christos Faloutsos", title = "Graph-Based Fraud Detection in the Face of Camouflage", journal = j-TKDD, volume = "11", number = "4", pages = "44:1--44:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3056563", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a bipartite graph of users and the products that they review, or followers and followees, how can we detect fake reviews or follows? Existing fraud detection methods (spectral, etc.) try to identify dense subgraphs of nodes that are sparsely connected to the remaining graph. Fraudsters can evade these methods using camouflage, by adding reviews or follows with honest targets so that they look ``normal.'' Even worse, some fraudsters use hijacked accounts from honest users, and then the camouflage is indeed organic. Our focus is to spot fraudsters in the presence of camouflage or hijacked accounts. We propose FRAUDAR, an algorithm that (a) is camouflage resistant, (b) provides upper bounds on the effectiveness of fraudsters, and (c) is effective in real-world data. Experimental results under various attacks show that FRAUDAR outperforms the top competitor in accuracy of detecting both camouflaged and non-camouflaged fraud. Additionally, in real-world experiments with a Twitter follower--followee graph of 1.47 billion edges, FRAUDAR successfully detected a subgraph of more than 4, 000 detected accounts, of which a majority had tweets showing that they used follower-buying services.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Anderson:2017:AHE, author = "Ashton Anderson and Jon Kleinberg and Sendhil Mullainathan", title = "Assessing Human Error Against a Benchmark of Perfection", journal = j-TKDD, volume = "11", number = "4", pages = "45:1--45:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3046947", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "An increasing number of domains are providing us with detailed trace data on human decisions in settings where we can evaluate the quality of these decisions via an algorithm. Motivated by this development, an emerging line of work has begun to consider whether we can characterize and predict the kinds of decisions where people are likely to make errors. To investigate what a general framework for human error prediction might look like, we focus on a model system with a rich history in the behavioral sciences: the decisions made by chess players as they select moves in a game. We carry out our analysis at a large scale, employing datasets with several million recorded games, and using chess tablebases to acquire a form of ground truth for a subset of chess positions that have been completely solved by computers but remain challenging for even the best players in the world. We organize our analysis around three categories of features that we argue are present in most settings where the analysis of human error is applicable: the skill of the decision-maker, the time available to make the decision, and the inherent difficulty of the decision. We identify rich structure in all three of these categories of features, and find strong evidence that in our domain, features describing the inherent difficulty of an instance are significantly more powerful than features based on skill or time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2017:DCM, author = "Yihan Wang and Shaoxu Song and Lei Chen and Jeffrey Xu Yu and Hong Cheng", title = "Discovering Conditional Matching Rules", journal = j-TKDD, volume = "11", number = "4", pages = "46:1--46:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3070647", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Matching dependencies (MDs) have recently been proposed to make data dependencies tolerant to various information representations, and found useful in data quality applications such as record matching. Instead of the strict equality function used in traditional dependency syntax (e.g., functional dependencies), MDs specify constraints based on similarity and identification. However, in practice, MDs may still be too strict and applicable only in a subset of tuples in a relation. Thereby, we study the conditional matching dependencies (CMDs), which bind matching dependencies only in a certain part of a table, i.e., MDs conditionally applicable in a subset of tuples. Compared to MDs, CMDs have more expressive power that enables them to satisfy wider application needs. In this article, we study several important theoretical and practical issues of CMDs, including irreducible CMDs with respect to the implication, discovery of CMDs from data, reliable CMDs agreed most by a relation, approximate CMDs almost satisfied in a relation, and finally applications of CMDs in record matching and missing value repairing. Through an extensive experimental evaluation in real data sets, we demonstrate the efficiency of proposed CMDs discovery algorithms and effectiveness of CMDs in real applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Anagnostopoulos:2017:QDL, author = "Christos Anagnostopoulos and Peter Triantafillou", title = "Query-Driven Learning for Predictive Analytics of Data Subspace Cardinality", journal = j-TKDD, volume = "11", number = "4", pages = "47:1--47:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3059177", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Fundamental to many predictive analytics tasks is the ability to estimate the cardinality (number of data items) of multi-dimensional data subspaces, defined by query selections over datasets. This is crucial for data analysts dealing with, e.g., interactive data subspace explorations, data subspace visualizations, and in query processing optimization. However, in many modern data systems, predictive analytics may be (i) too costly money-wise, e.g., in clouds, (ii) unreliable, e.g., in modern Big Data query engines, where accurate statistics are difficult to obtain/maintain, or (iii) infeasible, e.g., for privacy issues. We contribute a novel, query-driven, function estimation model of analyst-defined data subspace cardinality. The proposed estimation model is highly accurate in terms of prediction and accommodating the well-known selection queries: multi-dimensional range and distance-nearest neighbors (radius) queries. Our function estimation model: (i) quantizes the vectorial query space, by learning the analysts' access patterns over a data space, (ii) associates query vectors with their corresponding cardinalities of the analyst-defined data subspaces, (iii) abstracts and employs query vectorial similarity to predict the cardinality of an unseen/unexplored data subspace, and (iv) identifies and adapts to possible changes of the query subspaces based on the theory of optimal stopping. The proposed model is decentralized, facilitating the scaling-out of such predictive analytics queries. The research significance of the model lies in that (i) it is an attractive solution when data-driven statistical techniques are undesirable or infeasible, (ii) it offers a scale-out, decentralized training solution, (iii) it is applicable to different selection query types, and (iv) it offers a performance that is superior to that of data-driven approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2017:LSO, author = "Yue Wu and Steven C. H. Hoi and Tao Mei and Nenghai Yu", title = "Large-Scale Online Feature Selection for Ultra-High Dimensional Sparse Data", journal = j-TKDD, volume = "11", number = "4", pages = "48:1--48:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3070646", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Feature selection (FS) is an important technique in machine learning and data mining, especially for large-scale high-dimensional data. Most existing studies have been restricted to batch learning, which is often inefficient and poorly scalable when handling big data in real world. As real data may arrive sequentially and continuously, batch learning has to retrain the model for the new coming data, which is very computationally intensive. Online feature selection (OFS) is a promising new paradigm that is more efficient and scalable than batch learning algorithms. However, existing online algorithms usually fall short in their inferior efficacy. In this article, we present a novel second-order OFS algorithm that is simple yet effective, very fast and extremely scalable to deal with large-scale ultra-high dimensional sparse data streams. The basic idea is to exploit the second-order information to choose the subset of important features with high confidence weights. Unlike existing OFS methods that often suffer from extra high computational cost, we devise a novel algorithm with a MaxHeap-based approach, which is not only more effective than the existing first-order algorithms, but also significantly more efficient and scalable. Our extensive experiments validated that the proposed technique achieves highly competitive accuracy as compared with state-of-the-art batch FS methods, meanwhile it consumes significantly less computational cost that is orders of magnitude lower. Impressively, on a billion-scale synthetic dataset (1-billion dimensions, 1-billion non-zero features, and 1-million samples), the proposed algorithm takes less than 3 minutes to run on a single PC.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Costa:2017:MTA, author = "Alceu Ferraz Costa and Yuto Yamaguchi and Agma Juci Machado Traina and Caetano {Traina Jr.} and Christos Faloutsos", title = "Modeling Temporal Activity to Detect Anomalous Behavior in Social Media", journal = j-TKDD, volume = "11", number = "4", pages = "49:1--49:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3064884", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Social media has become a popular and important tool for human communication. However, due to this popularity, spam and the distribution of malicious content by computer-controlled users, known as bots, has become a widespread problem. At the same time, when users use social media, they generate valuable data that can be used to understand the patterns of human communication. In this article, we focus on the following important question: Can we identify and use patterns of human communication to decide whether a human or a bot controls a user? The first contribution of this article is showing that the distribution of inter-arrival times (IATs) between postings is characterized by following four patterns: (i) heavy-tails, (ii) periodic-spikes, (iii) correlation between consecutive values, and (iv) bimodallity. As our second contribution, we propose a mathematical model named Act-M (Activity Model). We show that Act-M can accurately fit the distribution of IATs from social media users. Finally, we use Act-M to develop a method that detects if users are bots based only on the timing of their postings. We validate Act-M using data from over 55 million postings from four social media services: Reddit, Twitter, Stack-Overflow, and Hacker-News. Our experiments show that Act-M provides a more accurate fit to the data than existing models for human dynamics. Additionally, when detecting bots, Act-M provided a precision higher than 93\% and 77\% with a sensitivity of 70\% for the Twitter and Reddit datasets, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Vosoughi:2017:RGP, author = "Soroush Vosoughi and Mostafa `Neo' Mohsenvand and Deb Roy", title = "Rumor Gauge: Predicting the Veracity of Rumors on {Twitter}", journal = j-TKDD, volume = "11", number = "4", pages = "50:1--50:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3070644", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The spread of malicious or accidental misinformation in social media, especially in time-sensitive situations, such as real-world emergencies, can have harmful effects on individuals and society. In this work, we developed models for automated verification of rumors (unverified information) that propagate through Twitter. To predict the veracity of rumors, we identified salient features of rumors by examining three aspects of information spread: linguistic style used to express rumors, characteristics of people involved in propagating information, and network propagation dynamics. The predicted veracity of a time series of these features extracted from a rumor (a collection of tweets) is generated using Hidden Markov Models. The verification algorithm was trained and tested on 209 rumors representing 938,806 tweets collected from real-world events, including the 2013 Boston Marathon bombings, the 2014 Ferguson unrest, and the 2014 Ebola epidemic, and many other rumors about various real-world events reported on popular websites that document public rumors. The algorithm was able to correctly predict the veracity of 75\% of the rumors faster than any other public source, including journalists and law enforcement officials. The ability to track rumors and predict their outcomes may have practical applications for news consumers, financial markets, journalists, and emergency services, and more generally to help minimize the impact of false information on Twitter.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Boutemine:2017:MCS, author = "Oualid Boutemine and Mohamed Bouguessa", title = "Mining Community Structures in Multidimensional Networks", journal = j-TKDD, volume = "11", number = "4", pages = "51:1--51:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3080574", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We investigate the problem of community detection in multidimensional networks, that is, networks where entities engage in various interaction types (dimensions) simultaneously. While some approaches have been proposed to identify community structures in multidimensional networks, there are a number of problems still to solve. In fact, the majority of the proposed approaches suffer from one or even more of the following limitations: (1) difficulty detecting communities in networks characterized by the presence of many irrelevant dimensions, (2) lack of systematic procedures to explicitly identify the relevant dimensions of each community, and (3) dependence on a set of user-supplied parameters, including the number of communities, that require a proper tuning. Most of the existing approaches are inadequate for dealing with these three issues in a unified framework. In this paper, we develop a novel approach that is capable of addressing the aforementioned limitations in a single framework. The proposed approach allows automated identification of communities and their sub-dimensional spaces using a novel objective function and a constrained label propagation-based optimization strategy. By leveraging the relevance of dimensions at the node level, the strategy aims to maximize the number of relevant within-community links while keeping track of the most relevant dimensions. A notable feature of the proposed approach is that it is able to automatically identify low dimensional community structures embedded in a high dimensional space. Experiments on synthetic and real multidimensional networks illustrate the suitability of the new method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Algizawy:2017:RTL, author = "Essam Algizawy and Tetsuji Ogawa and Ahmed El-Mahdy", title = "Real-Time Large-Scale Map Matching Using Mobile Phone Data", journal = j-TKDD, volume = "11", number = "4", pages = "52:1--52:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3046945", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 22 09:23:44 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the wide spread use of mobile phones, cellular mobile big data is becoming an important resource that provides a wealth of information with almost no cost. However, the data generally suffers from relatively high spatial granularity, limiting the scope of its application. In this article, we consider, for the first time, the utility of actual mobile big data for map matching allowing for ``microscopic'' level traffic analysis. The state-of-the-art in map matching generally targets GPS data, which provides far denser sampling and higher location resolution than the mobile data. Our approach extends the typical Hidden-Markov model used in map matching to accommodate for highly sparse location trajectories, exploit the large mobile data volume to learn the model parameters, and exploit the sparsity of the data to provide for real-time Viterbi processing. We study an actual, anonymised mobile trajectories data set of the city of Dakar, Senegal, spanning a year, and generate a corresponding road-level traffic density, at an hourly granularity, for each mobile trajectory. We observed a relatively high correlation between the generated traffic intensities and corresponding values obtained by the gravity and equilibrium models typically used in mobility analysis, indicating the utility of the approach as an alternative means for traffic analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{vanLeeuwen:2018:ETS, author = "Matthijs van Leeuwen and Polo Chau and Jilles Vreeken and Dafna Shahaf and Christos Faloutsos", title = "Editorial: {TKDD} Special Issue on Interactive Data Exploration and Analytics", journal = j-TKDD, volume = "12", number = "1", pages = "1:1--1:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3181707", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rayar:2018:VIS, author = "Fr{\'e}d{\'e}ric Rayar and Sabine Barrat and Fatma Bouali and Gilles Venturini", title = "A Viewable Indexing Structure for the Interactive Exploration of Dynamic and Large Image Collections", journal = j-TKDD, volume = "12", number = "1", pages = "2:1--2:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3047011", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Thanks to the capturing devices cost reduction and the advent of social networks, the size of image collections is becoming extremely huge. Many works in the literature have addressed the indexing of large image collections for search purposes. However, there is a lack of support for exploratory data mining. One may want to wander around the images and experience serendipity in the exploration process. Thus, effective paradigms not only for organising, but also visualising these image collections become necessary. In this article, we present a study to jointly index and visualise large image collections. The work focuses on satisfying three constraints. First, large image collections, up to million of images, shall be handled. Second, dynamic collections, such as ever-growing collections, shall be processed in an incremental way, without reprocessing the whole collection at each modification. Finally, an intuitive and interactive exploration system shall be provided to the user to allow him to easily mine image collections. To this end, a data partitioning algorithm has been modified and proximity graphs have been used to fit the visualisation purpose. A custom web platform has been implemented to visualise the hierarchical and graph-based hybrid structure. The results of a user evaluation we have conducted show that the exploration of the collections is intuitive and smooth thanks to the proposed structure. Furthermore, the scalability of the proposed indexing method is proved using large public image collections.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Makki:2018:AVV, author = "Raheleh Makki and Eder Carvalho and Axel J. Soto and Stephen Brooks and Maria Cristina {Ferreira De Oliveira} and Evangelos Milios and Rosane Minghim", title = "{ATR-Vis}: Visual and Interactive Information Retrieval for Parliamentary Discussions in {Twitter}", journal = j-TKDD, volume = "12", number = "1", pages = "3:1--3:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3047010", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The worldwide adoption of Twitter turned it into one of the most popular platforms for content analysis as it serves as a gauge of the public's feeling and opinion on a variety of topics. This is particularly true of political discussions and lawmakers' actions and initiatives. Yet, one common but unrealistic assumption is that the data of interest for analysis is readily available in a comprehensive and accurate form. Data need to be retrieved, but due to the brevity and noisy nature of Twitter content, it is difficult to formulate user queries that match relevant posts that use different terminology without introducing a considerable volume of unwanted content. This problem is aggravated when the analysis must contemplate multiple and related topics of interest, for which comments are being concurrently posted. This article presents Active Tweet Retrieval Visualization (ATR-Vis), a user-driven visual approach for the retrieval of Twitter content applicable to this scenario. The method proposes a set of active retrieval strategies to involve an analyst in such a way that a major improvement in retrieval coverage and precision is attained with minimal user effort. ATR-Vis enables non-technical users to benefit from the aforementioned active learning strategies by providing visual aids to facilitate the requested supervision. This supports the exploration of the space of potentially relevant tweets, and affords a better understanding of the retrieval results. We evaluate our approach in scenarios in which the task is to retrieve tweets related to multiple parliamentary debates within a specific time span. We collected two Twitter datasets, one associated with debates in the Canadian House of Commons during a particular week in May 2014, and another associated with debates in the Brazilian Federal Senate during a selected week in May 2015. The two use cases illustrate the effectiveness of ATR-Vis for the retrieval of relevant tweets, while quantitative results show that our approach achieves high retrieval quality with a modest amount of supervision. Finally, we evaluated our tool with three external users who perform searching in social media as part of their professional work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lim:2018:MEA, author = "Yongsub Lim and Minsoo Jung and U. Kang", title = "Memory-Efficient and Accurate Sampling for Counting Local Triangles in Graph Streams: From Simple to Multigraphs", journal = j-TKDD, volume = "12", number = "1", pages = "4:1--4:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3022186", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How can we estimate local triangle counts accurately in a graph stream without storing the whole graph? How to handle duplicated edges in local triangle counting for graph stream? Local triangle counting, which computes the number of triangles attached to each node in a graph, is a very important problem with wide applications in social network analysis, anomaly detection, web mining, and the like. In this article, we propose algorithms for local triangle counting in a graph stream based on edge sampling: M ascot for a simple graph, and MultiBMascot and MultiWMascot for a multigraph. To develop Mascot, we first present two naive local triangle counting algorithms in a graph stream, called Mascot-C and Mascot-A. Mascot-C is based on constant edge sampling, and Mascot-A improves its accuracy by utilizing more memory spaces. Mascot achieves both accuracy and memory-efficiency of the two algorithms by unconditional triangle counting for a new edge, regardless of whether it is sampled or not. Extending the idea to a multigraph, we develop two algorithms MultiBMascot and MultiWMascot. MultiBMascot enables local triangle counting on the corresponding simple graph of a streamed multigraph without explicit graph conversion; MultiWMascot considers repeated occurrences of an edge as its weight and counts each triangle as the product of its three edge weights. In contrast to the existing algorithm that requires prior knowledge on the target graph and appropriately set parameters, our proposed algorithms require only one parameter of edge sampling probability. Through extensive experiments, we show that for the same number of edges sampled, M ascot provides the best accuracy compared to the existing algorithm as well as Mascot-C and Mascot-A. We also demonstrate that MultiBMascot on a multigraph is comparable to Mascot-C on the counterpart simple graph, and MultiWMascot becomes more accurate for higher degree nodes. Thanks to Mascot, we also discover interesting anomalous patterns in real graphs, including core-peripheries in the web, a bimodal call pattern in a phone call history, and intensive collaboration in DBLP.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shi:2018:VAB, author = "Lei Shi and Hanghang Tong and Madelaine Daianu and Feng Tian and Paul M. Thompson", title = "Visual Analysis of Brain Networks Using Sparse Regression Models", journal = j-TKDD, volume = "12", number = "1", pages = "5:1--5:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3023363", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Studies of the human brain network are becoming increasingly popular in the fields of neuroscience, computer science, and neurology. Despite this rapidly growing line of research, gaps remain on the intersection of data analytics, interactive visual representation, and the human intelligence-all needed to advance our understanding of human brain networks. This article tackles this challenge by exploring the design space of visual analytics. We propose an integrated framework to orchestrate computational models with comprehensive data visualizations on the human brain network. The framework targets two fundamental tasks: the visual exploration of multi-label brain networks and the visual comparison among brain networks across different subject groups. During the first task, we propose a novel interactive user interface to visualize sets of labeled brain networks; in our second task, we introduce sparse regression models to select discriminative features from the brain network to facilitate the comparison. Through user studies and quantitative experiments, both methods are shown to greatly improve the visual comparison performance. Finally, real-world case studies with domain experts demonstrate the utility and effectiveness of our framework to analyze reconstructions of human brain connectivity maps. The perceptually optimized visualization design and the feature selection model calibration are shown to be the key to our significant findings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Galbrun:2018:MRS, author = "Esther Galbrun and Pauli Miettinen", title = "Mining Redescriptions with Siren", journal = j-TKDD, volume = "12", number = "1", pages = "6:1--6:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3007212", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In many areas of science, scientists need to find distinct common characterizations of the same objects and, vice versa, to identify sets of objects that admit multiple shared descriptions. For example, in biology, an important task is to identify the bioclimatic constraints that allow some species to survive, that is, to describe geographical regions both in terms of the fauna that inhabits them and of their bioclimatic conditions. In data analysis, the task of automatically generating such alternative characterizations is called redescription mining. If a domain expert wants to use redescription mining in his research, merely being able to find redescriptions is not enough. He must also be able to understand the redescriptions found, adjust them to better match his domain knowledge, test alternative hypotheses with them, and guide the mining process toward results he considers interesting. To facilitate these goals, we introduce Siren, an interactive tool for mining and visualizing redescriptions. Siren allows to obtain redescriptions in an anytime fashion through efficient, distributed mining, to examine the results in various linked visualizations, to interact with the results either directly or via the visualizations, and to guide the mining algorithm toward specific redescriptions. In this article, we explain the features of Siren and why they are useful for redescription mining. We also propose two novel redescription mining algorithms that improve the generalizability of the results compared to the existing ones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2018:IDC, author = "Hao Wu and Maoyuan Sun and Peng Mi and Nikolaj Tatti and Chris North and Naren Ramakrishnan", title = "Interactive Discovery of Coordinated Relationship Chains with Maximum Entropy Models", journal = j-TKDD, volume = "12", number = "1", pages = "7:1--7:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3047017", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Modern visual analytic tools promote human-in-the-loop analysis but are limited in their ability to direct the user toward interesting and promising directions of study. This problem is especially acute when the analysis task is exploratory in nature, e.g., the discovery of potentially coordinated relationships in massive text datasets. Such tasks are very common in domains like intelligence analysis and security forensics where the goal is to uncover surprising coalitions bridging multiple types of relations. We introduce new maximum entropy models to discover surprising chains of relationships leveraging count data about entity occurrences in documents. These models are embedded in a visual analytic system called MERCER (Maximum Entropy Relational Chain ExploRer) that treats relationship bundles as first class objects and directs the user toward promising lines of inquiry. We demonstrate how user input can judiciously direct analysis toward valid conclusions, whereas a purely algorithmic approach could be led astray. Experimental results on both synthetic and real datasets from the intelligence community are presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Choo:2018:VVA, author = "Jaegul Choo and Hannah Kim and Edward Clarkson and Zhicheng Liu and Changhyun Lee and Fuxin Li and Hanseung Lee and Ramakrishnan Kannan and Charles D. Stolper and John Stasko and Haesun Park", title = "{VisIRR}: a Visual Analytics System for Information Retrieval and Recommendation for Large-Scale Document Data", journal = j-TKDD, volume = "12", number = "1", pages = "8:1--8:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3070616", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article, we present an interactive visual information retrieval and recommendation system, called VisIRR, for large-scale document discovery. VisIRR effectively combines the paradigms of (1) a passive pull through query processes for retrieval and (2) an active push that recommends items of potential interest to users based on their preferences. Equipped with an efficient dynamic query interface against a large-scale corpus, VisIRR organizes the retrieved documents into high-level topics and visualizes them in a 2D space, representing the relationships among the topics along with their keyword summary. In addition, based on interactive personalized preference feedback with regard to documents, VisIRR provides document recommendations from the entire corpus, which are beyond the retrieved sets. Such recommended documents are visualized in the same space as the retrieved documents, so that users can seamlessly analyze both existing and newly recommended ones. This article presents novel computational methods, which make these integrated representations and fast interactions possible for a large-scale document corpus. We illustrate how the system works by providing detailed usage scenarios. Additionally, we present preliminary user study results for evaluating the effectiveness of the system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kamat:2018:SBA, author = "Niranjan Kamat and Arnab Nandi", title = "A Session-Based Approach to Fast-But-Approximate Interactive Data Cube Exploration", journal = j-TKDD, volume = "12", number = "1", pages = "9:1--9:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3070648", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the proliferation of large datasets, sampling has become pervasive in data analysis. Sampling has numerous benefits-from reducing the computation time and cost to increasing the scope of interactive analysis. A popular task in data science, well-suited toward sampling, is the computation of fast-but-approximate aggregations over sampled data. Aggregation is a foundational block of data analysis, with data cube being its primary construct. We observe that such aggregation queries are typically issued in an ad-hoc, interactive setting. In contrast to one-off queries, a typical query session consists of a series of quick queries, interspersed with the user inspecting the results and formulating the next query. The similarity between session queries opens up opportunities for reusing computation of not just query results, but also error estimates. Error estimates need to be provided alongside sampled results for the results to be meaningful. We propose Sesame, a rewrite and caching framework that accelerates the entire interactive session of aggregation queries over sampled data. We focus on two unique and computationally expensive aspects of this use case: query speculation in the presence of sampling, and error computation, and provide novel strategies for result and error reuse. We demonstrate that our approach outperforms conventional sampled aggregation techniques by at least an order of magnitude, without modifying the underlying database.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Senin:2018:GID, author = "Pavel Senin and Jessica Lin and Xing Wang and Tim Oates and Sunil Gandhi and Arnold P. Boedihardjo and Crystal Chen and Susan Frankenstein", title = "{GrammarViz} 3.0: Interactive Discovery of Variable-Length Time Series Patterns", journal = j-TKDD, volume = "12", number = "1", pages = "10:1--10:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3051126", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The problems of recurrent and anomalous pattern discovery in time series, e.g., motifs and discords, respectively, have received a lot of attention from researchers in the past decade. However, since the pattern search space is usually intractable, most existing detection algorithms require that the patterns have discriminative characteristics and have its length known in advance and provided as input, which is an unreasonable requirement for many real-world problems. In addition, patterns of similar structure, but of different lengths may co-exist in a time series. Addressing these issues, we have developed algorithms for variable-length time series pattern discovery that are based on symbolic discretization and grammar inference-two techniques whose combination enables the structured reduction of the search space and discovery of the candidate patterns in linear time. In this work, we present GrammarViz 3.0-a software package that provides implementations of proposed algorithms and graphical user interface for interactive variable-length time series pattern discovery. The current version of the software provides an alternative grammar inference algorithm that improves the time series motif discovery workflow, and introduces an experimental procedure for automated discretization parameter selection that builds upon the minimum cardinality maximum cover principle and aids the time series recurrent and anomalous pattern discovery.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Datta:2018:CVC, author = "Srayan Datta and Eytan Adar", title = "{CommunityDiff}: Visualizing Community Clustering Algorithms", journal = j-TKDD, volume = "12", number = "1", pages = "11:1--11:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3047009", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community detection is an oft-used analytical function of network analysis but can be a black art to apply in practice. Grouping of related nodes is important for identifying patterns in network datasets but also notoriously sensitive to input data and algorithm selection. This is further complicated by the fact that, depending on domain and use case, the ground truth knowledge of the end-user can vary from none to complete. In this work, we present CommunityDiff, an interactive visualization system that combines visualization and active learning (AL) to support the end-user's analytical process. As the end-user interacts with the system, a continuous refinement process updates both the community labels and visualizations. CommunityDiff features a mechanism for visualizing ensemble spaces, weighted combinations of algorithm output, that can identify patterns, commonalities, and differences among multiple community detection algorithms. Among other features, CommunityDiff introduces an AL mechanism that visually indicates uncertainty about community labels to focus end-user attention and supporting end-user control that ranges from explicitly indicating the number of expected communities to merging and splitting communities. Based on this end-user input, CommunityDiff dynamically recalculates communities. We demonstrate the viability of our through a study of speed of end-user convergence on satisfactory community labels. As part of building CommunityDiff, we describe a design process that can be adapted to other Interactive Machine Learning applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2018:LIC, author = "Yang Yang and Jie Tang and Juanzi Li", title = "Learning to Infer Competitive Relationships in Heterogeneous Networks", journal = j-TKDD, volume = "12", number = "1", pages = "12:1--12:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3051127", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Detecting and monitoring competitors is fundamental to a company to stay ahead in the global market. Existing studies mainly focus on mining competitive relationships within a single data source, while competing information is usually distributed in multiple networks. How to discover the underlying patterns and utilize the heterogeneous knowledge to avoid biased aspects in this issue is a challenging problem. In this article, we study the problem of mining competitive relationships by learning across heterogeneous networks. We use Twitter and patent records as our data sources and statistically study the patterns behind the competitive relationships. We find that the two networks exhibit different but complementary patterns of competitions. Overall, we find that similar entities tend to be competitors, with a probability of 4 times higher than chance. On the other hand, in social network, we also find a 10 minutes phenomenon: when two entities are mentioned by the same user within 10 minutes, the likelihood of them being competitors is 25 times higher than chance. Based on the discovered patterns, we propose a novel Topical Factor Graph Model. Generally, our model defines a latent topic layer to bridge the Twitter network and patent network. It then employs a semi-supervised learning algorithm to classify the relationships between entities (e.g., companies or products). We test the proposed model on two real data sets and the experimental results validate the effectiveness of our model, with an average of +46\% improvement over alternative methods. Besides, we further demonstrate the competitive relationships inferred by our proposed model can be applied in the job-hopping prediction problem by achieving an average of +10.7\% improvement.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2018:PSM, author = "Boyue Wang and Yongli Hu and Junbin Gao and Yanfeng Sun and Baocai Yin", title = "Partial Sum Minimization of Singular Values Representation on {Grassmann} Manifolds", journal = j-TKDD, volume = "12", number = "1", pages = "13:1--13:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092690", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering is one of the fundamental topics in data mining and pattern recognition. As a prospective clustering method, the subspace clustering has made considerable progress in recent researches, e.g., sparse subspace clustering (SSC) and low rank representation (LRR). However, most existing subspace clustering algorithms are designed for vectorial data from linear spaces, thus not suitable for high-dimensional data with intrinsic non-linear manifold structure. For high-dimensional or manifold data, few research pays attention to clustering problems. The purpose of clustering on manifolds tends to cluster manifold-valued data into several groups according to the mainfold-based similarity metric. This article proposes an extended LRR model for manifold-valued Grassmann data that incorporates prior knowledge by minimizing partial sum of singular values instead of the nuclear norm, namely Partial Sum minimization of Singular Values Representation (GPSSVR). The new model not only enforces the global structure of data in low rank, but also retains important information by minimizing only smaller singular values. To further maintain the local structures among Grassmann points, we also integrate the Laplacian penalty with GPSSVR. The proposed model and algorithms are assessed on a public human face dataset, some widely used human action video datasets and a real scenery dataset. The experimental results show that the proposed methods obviously outperform other state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Trevino:2018:DSE, author = "Edgar S. Garc{\'\i}a Trevi{\~n}o and Muhammad Zaid Hameed and Javier A. Barria", title = "Data Stream Evolution Diagnosis Using Recursive Wavelet Density Estimators", journal = j-TKDD, volume = "12", number = "1", pages = "14:1--14:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3106369", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data streams are a new class of data that is becoming pervasively important in a wide range of applications, ranging from sensor networks, environmental monitoring to finance. In this article, we propose a novel framework for the online diagnosis of evolution of multidimensional streaming data that incorporates Recursive Wavelet Density Estimators into the context of Velocity Density Estimation. In the proposed framework changes in streaming data are characterized by the use of local and global evolution coefficients. In addition, we propose for the analysis of changes in the correlation structure of the data a recursive implementation of the Pearson correlation coefficient using exponential discounting. Two visualization tools, namely temporal and spatial velocity profiles, are extended in the context of the proposed framework. These are the three main advantages of the proposed method over previous approaches: (1) the memory storage required is minimal and independent of any window size; (2) it has a significantly lower computational complexity; and (3) it makes possible the fast diagnosis of data evolution at all dimensions and at relevant combinations of dimensions with only one pass of the data. With the help of the four examples, we show the framework's relevance in a change detection context and its potential capability for real world applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kaushal:2018:ETP, author = "Vishal Kaushal and Manasi Patwardhan", title = "Emerging Trends in Personality Identification Using Online Social Networks --- a Literature Survey", journal = j-TKDD, volume = "12", number = "2", pages = "15:1--15:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3070645", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Personality is a combination of all the attributes-behavioral, temperamental, emotional, and mental-that characterizes a unique individual. Ability to identify personalities of people has always been of great interest to the researchers due to its importance. It continues to find highly useful applications in many domains. Owing to the increasing popularity of online social networks, researchers have started looking into the possibility of predicting a user's personality from his online social networking profile, which serves as a rich source of textual as well as non-textual content published by users. In the process of creating social networking profiles, users reveal a lot about themselves both in what they share and how they say it. Studies suggest that the online social networking websites are, in fact, a relevant and valid means of communicating personality. In this article, we review these various studies reported in literature toward identification of personality using online social networks. To the best of our knowledge, this is the first reported survey of its kind at the time of submission. We hope that our contribution, especially in summarizing the previous findings and in identifying the directions for future research in this area, would encourage researchers to do more work in this budding area.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pandove:2018:SRC, author = "Divya Pandove and Shivan Goel and Rinkl Rani", title = "Systematic Review of Clustering High-Dimensional and Large Datasets", journal = j-TKDD, volume = "12", number = "2", pages = "16:1--16:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3132088", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Technological advancement has enabled us to store and process huge amount of data in relatively short spans of time. The nature of data is rapidly changing, particularly its dimensionality is more commonly multi- and high-dimensional. There is an immediate need to expand our focus to include analysis of high-dimensional and large datasets. Data analysis is becoming a mammoth task, due to incremental increase in data volume and complexity in terms of heterogony of data. It is due to this dynamic computing environment that the existing techniques either need to be modified or discarded to handle new data in multiple high-dimensions. Data clustering is a tool that is used in many disciplines, including data mining, so that meaningful knowledge can be extracted from seemingly unstructured data. The aim of this article is to understand the problem of clustering and various approaches addressing this problem. This article discusses the process of clustering from both microviews (data treating) and macroviews (overall clustering process). Different distance and similarity measures, which form the cornerstone of effective data clustering, are also identified. Further, an in-depth analysis of different clustering approaches focused on data mining, dealing with large-scale datasets is given. These approaches are comprehensively compared to bring out a clear differentiation among them. This article also surveys the problem of high-dimensional data and the existing approaches, that makes it more relevant. It also explores the latest trends in cluster analysis, and the real-life applications of this concept. This survey is exhaustive as it tries to cover all the aspects of clustering in the field of data mining.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2018:LSC, author = "Yixuan Li and Kun He and Kyle Kloster and David Bindel and John Hopcroft", title = "Local Spectral Clustering for Overlapping Community Detection", journal = j-TKDD, volume = "12", number = "2", pages = "17:1--17:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3106370", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Large graphs arise in a number of contexts and understanding their structure and extracting information from them is an important research area. Early algorithms for mining communities have focused on global graph structure, and often run in time proportional to the size of the entire graph. As we explore networks with millions of vertices and find communities of size in the hundreds, it becomes important to shift our attention from macroscopic structure to microscopic structure in large networks. A growing body of work has been adopting local expansion methods in order to identify communities from a few exemplary seed members. In this article, we propose a novel approach for finding overlapping communities called Lemon (Local Expansion via Minimum One Norm). Provided with a few known seeds, the algorithm finds the community by performing a local spectral diffusion. The core idea of Lemon is to use short random walks to approximate an invariant subspace near a seed set, which we refer to as local spectra. Local spectra can be viewed as the low-dimensional embedding that captures the nodes' closeness in the local network structure. We show that Lemon's performance in detecting communities is competitive with state-of-the-art methods. Moreover, the running time scales with the size of the community rather than that of the entire graph. The algorithm is easy to implement and is highly parallelizable. We further provide theoretical analysis of the local spectral properties, bounding the measure of tightness of extracted community using the eigenvalues of graph Laplacian. We thoroughly evaluate our approach using both synthetic and real-world datasets across different domains, and analyze the empirical variations when applying our method to inherently different networks in practice. In addition, the heuristics on how the seed set quality and quantity would affect the performance are provided.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Costa:2018:MOC, author = "Gianni Costa and Riccardo Ortale", title = "Mining Overlapping Communities and Inner Role Assignments through {Bayesian} Mixed-Membership Models of Networks with Context-Dependent Interactions", journal = j-TKDD, volume = "12", number = "2", pages = "18:1--18:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3106368", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community discovery and role assignment have been recently integrated into an unsupervised approach for the exploratory analysis of overlapping communities and inner roles in networks. However, the formation of ties in these prototypical research efforts is not truly realistic, since it does not account for a fundamental aspect of link establishment in real-world networks, i.e., the explicative reasons that cause interactions among nodes. Such reasons can be interpreted as generic requirements of nodes, that are met by other nodes and essentially pertain both to the nodes themselves and to their interaction contexts (i.e., the respective communities and roles). In this article, we present two new model-based machine-learning approaches, wherein community discovery and role assignment are seamlessly integrated and simultaneously performed through approximate posterior inference in Bayesian mixed-membership models of directed networks. The devised models account for the explicative reasons governing link establishment in terms of node-specific and contextual latent interaction factors. The former are inherently characteristic of nodes, while the latter are characterizations of nodes in the context of the individual communities and roles. The generative process of both models assigns nodes to communities with respective roles and connects them through directed links, which are probabilistically governed by their node-specific and contextual interaction factors. The difference between the proposed models lies in the exploitation of the contextual interaction factors. More precisely, in one model, the contextual interaction factors have the same impact on link generation. In the other model, the contextual interaction factors are weighted by the extent of involvement of the linked nodes in the respective communities and roles. We develop MCMC algorithms implementing approximate posterior inference and parameter estimation within our models. Finally, we conduct an intensive comparative experimentation, which demonstrates their superiority in community compactness and link prediction on various real-world and synthetic networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Long:2018:PMS, author = "Cheng Long and Raymond Chi-Wing Wong and Victor Junqiu Wei", title = "Profit Maximization with Sufficient Customer Satisfactions", journal = j-TKDD, volume = "12", number = "2", pages = "19:1--19:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3110216", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In many commercial campaigns, we observe that there exists a tradeoff between the number of customers satisfied by the company and the profit gained. Merely satisfying as many customers as possible or maximizing the profit is not desirable. To this end, in this article, we propose a new problem called $k$-Satisfiability Assignment for Maximizing the Profit ( $$ k $$-SAMP), where $k$ is a user parameter and a non-negative integer. Given a set $P$ of products and a set $O$ of customers, $k$-SAMP is to find an assignment between $P$ and $O$ such that at least $k$ customers are satisfied in the assignment and the profit incurred by this assignment is maximized. Although we find that this problem is closely related to two classic computer science problems, namely maximum weight matching and maximum matching, the techniques developed for these classic problems cannot be adapted to our $k$-SAMP problem. In this work, we design a novel algorithm called Adjust for the $k$-SAMP problem. Given an assignment $A$, Adjust iteratively increases the profit of $A$ by adjusting some appropriate matches in $A$ while keeping at least $k$ customers satisfied in $A$. We prove that Adjust returns a global optimum. Extensive experiments were conducted that verified the efficiency of Adjust.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ramezani:2018:CDU, author = "Maryam Ramezani and Ali Khodadadi and Hamid R. Rabiee", title = "Community Detection Using Diffusion Information", journal = j-TKDD, volume = "12", number = "2", pages = "20:1--20:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3110215", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community detection in social networks has become a popular topic of research during the last decade. There exist a variety of algorithms for modularizing the network graph into different communities. However, they mostly assume that partial or complete information of the network graphs are available that is not feasible in many cases. In this article, we focus on detecting communities by exploiting their diffusion information. To this end, we utilize the Conditional Random Fields (CRF) to discover the community structures. The proposed method, community diffusion (CoDi), does not require any prior knowledge about the network structure or specific properties of communities. Furthermore, in contrast to the structure-based community detection methods, this method is able to identify the hidden communities. The experimental results indicate considerable improvements in detecting communities based on accuracy, scalability, and real cascade information measures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chiasserini:2018:ACS, author = "Carla-Fabiana Chiasserini and Michel Garetto and Emili Leonardi", title = "De-anonymizing Clustered Social Networks by Percolation Graph Matching", journal = j-TKDD, volume = "12", number = "2", pages = "21:1--21:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3127876", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Online social networks offer the opportunity to collect a huge amount of valuable information about billions of users. The analysis of this data by service providers and unintended third parties are posing serious treats to user privacy. In particular, recent work has shown that users participating in more than one online social network can be identified based only on the structure of their links to other users. An effective tool to de-anonymize social network users is represented by graph matching algorithms. Indeed, by exploiting a sufficiently large set of seed nodes, a percolation process can correctly match almost all nodes across the different social networks. In this article, we show the crucial role of clustering, which is a relevant feature of social network graphs (and many other systems). Clustering has both the effect of making matching algorithms more prone to errors, and the potential to greatly reduce the number of seeds needed to trigger percolation. We show these facts by considering a fairly general class of random geometric graphs with variable clustering level. We assume that seeds can be identified in particular sub-regions of the network graph, while no a priori knowledge about the location of the other nodes is required. Under these conditions, we show how clever algorithms can achieve surprisingly good performance while limiting the number of matching errors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2018:JRL, author = "Wayne Xin Zhao and Feifan Fan and Ji-Rong Wen and Edward Y. Chang", title = "Joint Representation Learning for Location-Based Social Networks with Multi-Grained Sequential Contexts", journal = j-TKDD, volume = "12", number = "2", pages = "22:1--22:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3127875", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article studies the problem of learning effective representations for Location-Based Social Networks (LBSN), which is useful in many tasks such as location recommendation and link prediction. Existing network embedding methods mainly focus on capturing topology patterns reflected in social connections, while check-in sequences, the most important data type in LBSNs, are not directly modeled by these models. In this article, we propose a representation learning method for LBSNs called as JRLM++, which models check-in sequences together with social connections. To capture sequential relatedness, JRLM++ characterizes two levels of sequential contexts, namely fine-grained and coarse-grained contexts. We present a learning algorithm tailored to the hierarchical architecture of the proposed model. We conduct extensive experiments on two important applications using real-world datasets. The experimental results demonstrate the superiority of our model. The proposed model can generate effective representations for both users and locations in the same embedding space, which can be further utilized to improve multiple LBSN tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2018:CFT, author = "Guang-Neng Hu and Xin-Yu Dai and Feng-Yu Qiu and Rui Xia and Tao Li and Shu-Jian Huang and Jia-Jun Chen", title = "Collaborative Filtering with Topic and Social Latent Factors Incorporating Implicit Feedback", journal = j-TKDD, volume = "12", number = "2", pages = "23:1--23:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3127873", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recommender systems (RSs) provide an effective way of alleviating the information overload problem by selecting personalized items for different users. Latent factors-based collaborative filtering (CF) has become the popular approaches for RSs due to its accuracy and scalability. Recently, online social networks and user-generated content provide diverse sources for recommendation beyond ratings. Although social matrix factorization (Social MF) and topic matrix factorization (Topic MF) successfully exploit social relations and item reviews, respectively; both of them ignore some useful information. In this article, we investigate the effective data fusion by combining the aforementioned approaches. First, we propose a novel model MR3 to jointly model three sources of information (i.e., ratings, item reviews, and social relations) effectively for rating prediction by aligning the latent factors and hidden topics. Second, we incorporate the implicit feedback from ratings into the proposed model to enhance its capability and to demonstrate its flexibility. We achieve more accurate rating prediction on real-life datasets over various state-of-the-art methods. Furthermore, we measure the contribution from each of the three data sources and the impact of implicit feedback from ratings, followed by the sensitivity analysis of hyperparameters. Empirical studies demonstrate the effectiveness and efficacy of our proposed model and its extension.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Perozzi:2018:DCA, author = "Bryan Perozzi and Leman Akoglu", title = "Discovering Communities and Anomalies in Attributed Graphs: Interactive Visual Exploration and Summarization", journal = j-TKDD, volume = "12", number = "2", pages = "24:1--24:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3139241", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a network with node attributes, how can we identify communities and spot anomalies? How can we characterize, describe, or summarize the network in a succinct way? Community extraction requires a measure of quality for connected subgraphs (e.g., social circles). Existing subgraph measures, however, either consider only the connectedness of nodes inside the community and ignore the cross-edges at the boundary (e.g., density) or only quantify the structure of the community and ignore the node attributes (e.g., conductance). In this work, we focus on node-attributed networks and introduce: (1) a new measure of subgraph quality for attributed communities called normality, (2) a community extraction algorithm that uses normality to extract communities and a few characterizing attributes per community, and (3) a summarization and interactive visualization approach for attributed graph exploration. More specifically, (1) we first introduce a new measure to quantify the normality of an attributed subgraph. Our normality measure carefully utilizes structure and attributes together to quantify both the internal consistency and external separability. We then formulate an objective function to automatically infer a few attributes (called the ``focus'') and respective attribute weights, so as to maximize the normality score of a given subgraph. Most notably, unlike many other approaches, our measure allows for many cross-edges as long as they can be ``exonerated;'' i.e., either (i) are expected under a null graph model, and/or (ii) their boundary nodes do not exhibit the focus attributes. Next, (2) we propose AMEN (for Attributed Mining of Entity Networks), an algorithm that simultaneously discovers the communities and their respective focus in a given graph, with a goal to maximize the total normality. Communities for which a focus that yields high normality cannot be found are considered low quality or anomalous. Last, (3) we formulate a summarization task with a multi-criteria objective, which selects a subset of the communities that (i) cover the entire graph well, are (ii) high quality and (iii) diverse in their focus attributes. We further design an interactive visualization interface that presents the communities to a user in an interpretable, user-friendly fashion. The user can explore all the communities, analyze various algorithm-generated summaries, as well as devise their own summaries interactively to characterize the network in a succinct way. As the experiments on real-world attributed graphs show, our proposed approaches effectively find anomalous communities and outperform several existing measures and methods, such as conductance, density, OddBall, and SODA. We also conduct extensive user studies to measure the capability and efficiency that our approach provides to the users toward network summarization, exploration, and sensemaking.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bonab:2018:GGO, author = "Hamed R. Bonab and Fazli Can", title = "{GOOWE}: Geometrically Optimum and Online-Weighted Ensemble Classifier for Evolving Data Streams", journal = j-TKDD, volume = "12", number = "2", pages = "25:1--25:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3139240", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:45 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Designing adaptive classifiers for an evolving data stream is a challenging task due to the data size and its dynamically changing nature. Combining individual classifiers in an online setting, the ensemble approach, is a well-known solution. It is possible that a subset of classifiers in the ensemble outperforms others in a time-varying fashion. However, optimum weight assignment for component classifiers is a problem, which is not yet fully addressed in online evolving environments. We propose a novel data stream ensemble classifier, called Geometrically Optimum and Online-Weighted Ensemble (GOOWE), which assigns optimum weights to the component classifiers using a sliding window containing the most recent data instances. We map vote scores of individual classifiers and true class labels into a spatial environment. Based on the Euclidean distance between vote scores and ideal-points, and using the linear least squares (LSQ) solution, we present a novel, dynamic, and online weighting approach. While LSQ is used for batch mode ensemble classifiers, it is the first time that we adapt and use it for online environments by providing a spatial modeling of online ensembles. In order to show the robustness of the proposed algorithm, we use real-world datasets and synthetic data generators using the Massive Online Analysis (MOA) libraries. First, we analyze the impact of our weighting system on prediction accuracy through two scenarios. Second, we compare GOOWE with eight state-of-the-art ensemble classifiers in a comprehensive experimental environment. Our experiments show that GOOWE provides improved reactions to different types of concept drift compared to our baselines. The statistical tests indicate a significant improvement in accuracy, with conservative time and memory requirements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xie:2018:ERP, author = "Hong Xie and Richard T. B. Ma and John C. S. Lui", title = "Enhancing Reputation via Price Discounts in E-Commerce Systems: a Data-Driven Approach", journal = j-TKDD, volume = "12", number = "3", pages = "26:1--26:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154417", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Reputation systems have become an indispensable component of modern E-commerce systems, as they help buyers make informed decisions in choosing trustworthy sellers. To attract buyers and increase the transaction volume, sellers need to earn reasonably high reputation scores. This process usually takes a substantial amount of time. To accelerate this process, sellers can provide price discounts to attract users, but the underlying difficulty is that sellers have no prior knowledge on buyers' preferences over price discounts. In this article, we develop an online algorithm to infer the optimal discount rate from data. We first formulate an optimization framework to select the optimal discount rate given buyers' discount preferences, which is a tradeoff between the short-term profit and the ramp-up time (for reputation). We then derive the closed-form optimal discount rate, which gives us key insights in applying a stochastic bandits framework to infer the optimal discount rate from the transaction data with regret upper bounds. We show that the computational complexity of evaluating the performance metrics is infeasibly high, and therefore, we develop efficient randomized algorithms with guaranteed performance to approximate them. Finally, we conduct experiments on a dataset crawled from eBay. Experimental results show that our framework can trade 60\% of the short-term profit for reducing the ramp-up time by 40\%. This reduction in the ramp-up time can increase the long-term profit of a seller by at least 20\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Belcastro:2018:GRA, author = "Loris Belcastro and Fabrizio Marozzo and Domenico Talia and Paolo Trunfio", title = "{G-RoI}: Automatic Region-of-Interest Detection Driven by Geotagged Social Media Data", journal = j-TKDD, volume = "12", number = "3", pages = "27:1--27:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154411", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Geotagged data gathered from social media can be used to discover interesting locations visited by users called Places-of-Interest (PoIs). Since a PoI is generally identified by the geographical coordinates of a single point, it is hard to match it with user trajectories. Therefore, it is useful to define an area, called Region-of-Interest ( RoI ), to represent the boundaries of the PoI's area. RoI mining techniques are aimed at discovering ROIs from PoIs and other data. Existing RoI mining techniques are based on three main approaches: predefined shapes, density-based clustering, and grid-based aggregation. This article proposes G-RoI, a novel RoI mining technique that exploits the indications contained in geotagged social media items to discover RoIs with a high accuracy. Experiments performed over a set of PoIs in Rome and Paris using social media geotagged data, demonstrate that G-RoI in most cases achieves better results than existing techniques. In particular, the mean F$_1$ score is 0.34 higher than that obtained with the well-known DBSCAN algorithm in Rome RoIs and 0.23 higher in Paris RoIs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shin:2018:FAF, author = "Kijung Shin and Bryan Hooi and Christos Faloutsos", title = "Fast, Accurate, and Flexible Algorithms for Dense Subtensor Mining", journal = j-TKDD, volume = "12", number = "3", pages = "28:1--28:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154414", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a large-scale and high-order tensor, how can we detect dense subtensors in it? Can we spot them in near-linear time but with quality guarantees? Extensive previous work has shown that dense subtensors, as well as dense subgraphs, indicate anomalous or fraudulent behavior (e.g., lockstep behavior in social networks). However, available algorithms for detecting dense subtensors are not satisfactory in terms of speed, accuracy, and flexibility. In this work, we propose two algorithms, called M-Zoom and M-Biz, for fast and accurate dense-subtensor detection with various density measures. M-Zoom gives a lower bound on the density of detected subtensors, while M-Biz guarantees the local optimality of detected subtensors. M-Zoom and M-Biz can be combined, giving the following advantages: (1) Scalable: scale near-linearly with all aspects of tensors and are up to 114$ \times $ faster than state-of-the-art methods with similar accuracy, (2) Provably accurate: provide a guarantee on the lowest density and local optimality of the subtensors they find, (3) Flexible: support multi-subtensor detection and size bounds as well as diverse density measures, and (4) Effective: successfully detected edit wars and bot activities in Wikipedia, and spotted network attacks from a TCP dump with near-perfect accuracy (AUC = 0.98).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2018:PRA, author = "Jiongqian Liang and Deepak Ajwani and Patrick K. Nicholson and Alessandra Sala and Srinivasan Parthasarathy", title = "Prioritized Relationship Analysis in Heterogeneous Information Networks", journal = j-TKDD, volume = "12", number = "3", pages = "29:1--29:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154401", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "An increasing number of applications are modeled and analyzed in network form, where nodes represent entities of interest and edges represent interactions or relationships between entities. Commonly, such relationship analysis tools assume homogeneity in both node type and edge type. Recent research has sought to redress the assumption of homogeneity and focused on mining heterogeneous information networks (HINs) where both nodes and edges can be of different types. Building on such efforts, in this work, we articulate a novel approach for mining relationships across entities in such networks while accounting for user preference over relationship type and interestingness metric. We formalize the problem as a top-$k$ lightest paths problem, contextualized in a real-world communication network, and seek to find the k most interesting path instances matching the preferred relationship type. Our solution, PROphetic HEuristic Algorithm for Path Searching (PRO-HEAPS), leverages a combination of novel graph preprocessing techniques, well-designed heuristics and the venerable $ A* $ search algorithm. We run our algorithm on real-world large-scale graphs and show that our algorithm significantly outperforms a wide variety of baseline approaches with speedups as large as 100X. To widen the range of applications, we also extend PRO-HEAPS to (i) support relationship analysis between two groups of entities and (ii) allow pattern path in the query to contain logical statements with operators AND, OR, NOT, and wild-card ``.''. We run experiments using this generalized version of PRO-HEAPS and demonstrate that the advantage of PRO-HEAPS becomes even more pronounced for these general cases. Furthermore, we conduct a comprehensive analysis to study how the performance of PRO-HEAPS varies with respect to various attributes of the input HIN. We finally conduct a case study to demonstrate valuable applications of our algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2018:WTC, author = "Hong Huang and Yuxiao Dong and Jie Tang and Hongxia Yang and Nitesh V. Chawla and Xiaoming Fu", title = "Will Triadic Closure Strengthen Ties in Social Networks?", journal = j-TKDD, volume = "12", number = "3", pages = "30:1--30:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154399", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The social triad-a group of three people-is one of the simplest and most fundamental social groups. Extensive network and social theories have been developed to understand its structure, such as triadic closure and social balance. Over the course of a triadic closure-the transition from two ties to three among three users, the strength dynamics of its social ties, however, are much less well understood. Using two dynamic networks from social media and mobile communication, we examine how the formation of the third tie in a triad affects the strength of the existing two ties. Surprisingly, we find that in about 80\% social triads, the strength of the first two ties is weakened although averagely the tie strength in the two networks maintains an increasing or stable trend. We discover that (1) the decrease in tie strength among three males is more sharply than that among females, and (2) the tie strength between celebrities is more likely to be weakened as the closure of a triad than those between ordinary people. Furthermore, we formalize a triadic tie strength dynamics prediction problem to infer whether social ties of a triad will become weakened after its closure. We propose a TRIST method-a kernel density estimation (KDE)-based graphical model-to solve the problem by incorporating user demographics, temporal effects, and structural information. Extensive experiments demonstrate that TRIST offers a greater than 82\% potential predictability for inferring triadic tie strength dynamics in both networks. The leveraging of the KDE and structural correlations enables TRIST to outperform baselines by up to 30\% in terms of F1-score.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2018:LSB, author = "Guangyong Chen and Fengyuan Zhu and Pheng Ann Heng", title = "Large-Scale {Bayesian} Probabilistic Matrix Factorization with Memo-Free Distributed Variational Inference", journal = j-TKDD, volume = "12", number = "3", pages = "31:1--31:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3161886", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Bayesian Probabilistic Matrix Factorization (BPMF) is a powerful model in many dyadic data prediction problems, especially the applications of Recommender system. However, its poor scalability has limited its wide applications on massive data. Based on the conditional independence property of observed entries in BPMF model, we propose a novel distributed memo-free variational inference method for large-scale matrix factorization problems. Compared with the state-of-the-art methods, the proposed method is favored for several attractive properties. Specifically, it does not require tuning of learning rate carefully, shuffling the training set at each iteration, or storing massive redundant variables, and can introduce new agents into the computations on the fly. We conduct extensive experiments on both synthetic and real-world datasets. The experimental results show that our method can converge significantly faster with better prediction performance than alternative algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2018:MVL, author = "Sheng Li and Ming Shao and Yun Fu", title = "Multi-View Low-Rank Analysis with Applications to Outlier Detection", journal = j-TKDD, volume = "12", number = "3", pages = "32:1--32:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3168363", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Detecting outliers or anomalies is a fundamental problem in various machine learning and data mining applications. Conventional outlier detection algorithms are mainly designed for single-view data. Nowadays, data can be easily collected from multiple views, and many learning tasks such as clustering and classification have benefited from multi-view data. However, outlier detection from multi-view data is still a very challenging problem, as the data in multiple views usually have more complicated distributions and exhibit inconsistent behaviors. To address this problem, we propose a multi-view low-rank analysis (MLRA) framework for outlier detection in this article. MLRA pursuits outliers from a new perspective, robust data representation. It contains two major components. First, the cross-view low-rank coding is performed to reveal the intrinsic structures of data. In particular, we formulate a regularized rank-minimization problem, which is solved by an efficient optimization algorithm. Second, the outliers are identified through an outlier score estimation procedure. Different from the existing multi-view outlier detection methods, MLRA is able to detect two different types of outliers from multiple views simultaneously. To this end, we design a criterion to estimate the outlier scores by analyzing the obtained representation coefficients. Moreover, we extend MLRA to tackle the multi-view group outlier detection problem. Extensive evaluations on seven UCI datasets, the MovieLens, the USPS-MNIST, and the WebKB datasets demonstrate that our approach outperforms several state-of-the-art outlier detection methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Altowim:2018:PAP, author = "Yasser Altowim and Dmitri V. Kalashnikov and Sharad Mehrotra", title = "{ProgressER}: Adaptive Progressive Approach to Relational Entity Resolution", journal = j-TKDD, volume = "12", number = "3", pages = "33:1--33:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3154410", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Entity resolution (ER) is the process of identifying which entities in a dataset refer to the same real-world object. In relational ER, the dataset consists of multiple entity-sets and relationships among them. Such relationships cause the resolution of some entities to influence the resolution of other entities. For instance, consider a relational dataset that consists of a set of research paper entities and a set of venue entities. In such a dataset, deciding that two research papers are the same may trigger the fact that their venues are also the same. This article proposes a progressive approach to relational ER, named ProgressER, that aims to produce the highest quality result given a constraint on the resolution budget, specified by the user. Such a progressive approach is useful for many emerging analytical applications that require low latency response (and thus cannot tolerate delays caused by cleaning the entire dataset) and/or in situations where the underlying resources are constrained or costly to use. To maximize the quality of the result, ProgressER follows an adaptive strategy that periodically monitors and reassesses the resolution progress to determine which parts of the dataset should be resolved next and how they should be resolved. More specifically, ProgressER divides the input budget into several resolution windows and analyzes the resolution progress at the beginning of each window to generate a resolution plan for the current window. A resolution plan specifies which blocks of entities and which entity pairs within blocks need to be resolved during the plan execution phase of that window. In addition, ProgressER specifies, for each identified pair of entities, the order in which the similarity functions should be applied on the pair. Such an order plays a significant role in reducing the overall cost because applying the first few functions in this order might be sufficient to resolve the pair. The empirical evaluation of ProgressER demonstrates its significant advantage in terms of progressiveness over the traditional ER techniques for the given problem settings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bakerman:2018:TGH, author = "Jordan Bakerman and Karl Pazdernik and Alyson Wilson and Geoffrey Fairchild and Rian Bahran", title = "{Twitter} Geolocation: a Hybrid Approach", journal = j-TKDD, volume = "12", number = "3", pages = "34:1--34:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178112", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Geotagging Twitter messages is an important tool for event detection and enrichment. Despite the availability of both social media content and user network information, these two features are generally utilized separately in the methodology. In this article, we create a hybrid method that uses Twitter content and network information jointly as model features. We use Gaussian mixture models to map the raw spatial distribution of the model features to a predicted field. This approach is scalable to large datasets and provides a natural representation of model confidence. Our method is tested against other approaches and we achieve greater prediction accuracy. The model also improves both precision and coverage.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moreno:2018:TKP, author = "Sebastian Moreno and Jennifer Neville and Sergey Kirshner", title = "Tied {Kronecker} Product Graph Models to Capture Variance in Network Populations", journal = j-TKDD, volume = "12", number = "3", pages = "35:1--35:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3161885", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Much of the past work on mining and modeling networks has focused on understanding the observed properties of single example graphs. However, in many real-life applications it is important to characterize the structure of populations of graphs. In this work, we analyze the distributional properties of probabilistic generative graph models (PGGMs) for network populations. PGGMs are statistical methods that model the network distribution and match common characteristics of real-world networks. Specifically, we show that most PGGMs cannot reflect the natural variability in graph properties observed across multiple networks because their edge generation process assumes independence among edges. Then, we propose the mixed Kronecker Product Graph Model (mKPGM), a scalable generalization of KPGMs that uses tied parameters to increase the variability of the sampled networks, while preserving the edge probabilities in expectation. We compare mKPGM to several other graph models. The results show that learned mKPGMs accurately represent the characteristics of real-world networks, while also effectively capturing the natural variability in network structure.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2018:FFR, author = "Pei Yang and Qi Tan and Jingrui He", title = "Function-on-Function Regression with Mode-Sparsity Regularization", journal = j-TKDD, volume = "12", number = "3", pages = "36:1--36:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178113", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Functional data is ubiquitous in many domains, such as healthcare, social media, manufacturing process, sensor networks, and so on. The goal of function-on-function regression is to build a mapping from functional predictors to functional response. In this article, we propose a novel function-on-function regression model based on mode-sparsity regularization. The main idea is to represent the regression coefficient function between predictor and response as the double expansion of basis functions, and then use a mode-sparsity regularization to automatically filter out irrelevant basis functions for both predictors and responses. The proposed approach is further extended to the tensor version to accommodate multiple functional predictors. While allowing the dimensionality of the regression weight matrix or tensor to be relatively large, the mode-sparsity regularized model facilitates the multi-way shrinking of basis functions for each mode. The proposed mode-sparsity regularization covers a wide spectrum of sparse models for function-on-function regression. The resulting optimization problem is challenging due to the non-smooth property of the mode-sparsity regularization. We develop an efficient algorithm to solve the problem, which works in an iterative update fashion, and converges to the global optimum. Furthermore, we analyze the generalization performance of the proposed method and derive an upper bound for the consistency between the recovered function and the underlying true function. The effectiveness of the proposed approach is verified on benchmark functional datasets in various domains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Khodadadi:2018:CTU, author = "Ali Khodadadi and Seyed Abbas Hosseini and Erfan Tavakoli and Hamid R. Rabiee", title = "Continuous-Time User Modeling in Presence of Badges: a Probabilistic Approach", journal = j-TKDD, volume = "12", number = "3", pages = "37:1--37:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3162050", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "User modeling plays an important role in delivering customized web services to the users and improving their engagement. However, most user models in the literature do not explicitly consider the temporal behavior of users. More recently, continuous-time user modeling has gained considerable attention and many user behavior models have been proposed based on temporal point processes. However, typical point process-based models often considered the impact of peer influence and content on the user participation and neglected other factors. Gamification elements are among those factors that are neglected, while they have a strong impact on user participation in online services. In this article, we propose interdependent multi-dimensional temporal point processes that capture the impact of badges on user participation besides the peer influence and content factors. We extend the proposed processes to model user actions over the community-based question and answering websites, and propose an inference algorithm based on Variational-Expectation Maximization that can efficiently learn the model parameters. Extensive experiments on both synthetic and real data gathered from Stack Overflow show that our inference algorithm learns the parameters efficiently and the proposed method can better predict the user behavior compared to the alternatives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Peng:2018:MEO, author = "Min Peng and Jiahui Zhu and Hua Wang and Xuhui Li and Yanchun Zhang and Xiuzhen Zhang and Gang Tian", title = "Mining Event-Oriented Topics in Microblog Stream with Unsupervised Multi-View Hierarchical Embedding", journal = j-TKDD, volume = "12", number = "3", pages = "38:1--38:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173044", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article presents an unsupervised multi-view hierarchical embedding (UMHE) framework to sufficiently reveal the intrinsic topical knowledge in social events. Event-oriented topics are highly related to such events as it can provide explicit descriptions of what have happened in social community. In many real-world cases, however, it is difficult to include all attributes of microblogs, more often, textual aspects only are available. Traditional topic modelling methods have failed to generate event-oriented topics with the textual aspects, since the inherent relations between topics are often overlooked in these methods. Meanwhile, the metrics in original word vocabulary space might not effectively capture semantic distances. Our UMHE framework overcomes the severe information deficiency and poor feature representation. The UMHE first develops a multi-view Bayesian rose tree to preliminarily generate prior knowledge for latent topics and their relations. With such prior knowledge, we design an unsupervised translation-based hierarchical embedding method to make a better representation of these latent topics. By applying self-adaptive spectral clustering on the embedding space and the original space concomitantly, we eventually extract event-oriented topics in word distributions to express social events. Our framework is purely data-driven and unsupervised, without any external knowledge. Experimental results on TREC Tweets2011 dataset and Sina Weibo dataset demonstrate that the UMHE framework can construct hierarchical structure with high fitness, but also yield topic embeddings with salient semantics; therefore, it can derive event-oriented topics with meaningful descriptions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Toth:2018:GDT, author = "Edward Toth and Sanjay Chawla", title = "{GT$ \Delta $}: Detecting Temporal Changes in Group Stochastic Processes", journal = j-TKDD, volume = "12", number = "4", pages = "39:1--39:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3183346", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given a portfolio of stocks or a series of frames in a video how do we detect significant changes in a group of values for real-time applications? In this article, we formalize the problem of sequentially detecting temporal changes in a group of stochastic processes. As a solution to this particular problem, we propose the group temporal change (GT$ \Delta $) algorithm, a simple yet effective technique for the sequential detection of significant changes in a variety of statistical properties of a group over time. Due to the flexible framework of the GT$ \Delta $ algorithm, a domain expert is able to select one or more statistical properties that they are interested in monitoring. The usefulness of our proposed algorithm is also demonstrated against state-of-the-art techniques on synthetically generated data as well as on two real-world applications; a portfolio of healthcare stocks over a 20 year period and a video monitoring the activity of our Sun.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xie:2018:SNM, author = "Wei Xie and Feida Zhu and Jing Xiao and Jianzong Wang", title = "Social Network Monitoring for Bursty Cascade Detection", journal = j-TKDD, volume = "12", number = "4", pages = "40:1--40:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178048", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Social network services have become important and efficient platforms for users to share all kinds of information. The capability to monitor user-generated information and detect bursts from information diffusions in these social networks brings value to a wide range of real-life applications, such as viral marketing. However, in reality, as a third party, there is always a cost for gathering information from each user or so-called social network sensor. The question then arises how to select a budgeted set of social network sensors to form the data stream for burst detection without compromising the detection performance. In this article, we present a general sensor selection solution for different burst detection approaches. We formulate this problem as a constraint satisfaction problem that has high computational complexity. To reduce the computational cost, we first reduce most of the constraints by making use of the fact that bursty cascades are rare among the whole population. We then transform the problem into an Linear Programming (LP) problem. Furthermore, we use the sub-gradient method instead of the standard simplex method or interior-point method to solve the LP problem, which makes it possible for our solution to scale up to large social networks. Evaluating our solution on millions of real information cascades, we demonstrate both the effectiveness and efficiency of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2018:MGC, author = "Xiaowei Chen and John C. S. Lui", title = "Mining Graphlet Counts in Online Social Networks", journal = j-TKDD, volume = "12", number = "4", pages = "41:1--41:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182392", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Counting subgraphs is a fundamental analysis task for online social networks (OSNs). Given the sheer size and restricted access of OSN, efficient computation of subgraph counts is highly challenging. Although a number of algorithms have been proposed to estimate the relative counts of subgraphs in OSNs with restricted access, there are only few works which try to solve a more general problem, i.e., counting subgraph frequencies. In this article, we propose an efficient random walk-based framework to estimate the subgraph counts. Our framework generates samples by leveraging consecutive steps of the random walk as well as by observing neighbors of visited nodes. Using the importance sampling technique, we derive unbiased estimators of the subgraph counts. To make better use of the degree information of visited nodes, we also design improved estimators, which increases the accuracy of the estimation with no additional cost. We conduct extensive experimental evaluation on real-world OSNs to confirm our theoretical claims. The experiment results show that our estimators are unbiased, accurate, efficient, and better than the state-of-the-art algorithms. For the Weibo graph with more than 58 million nodes, our method produces estimate of triangle count with an error less than 5\% using only 20,000 sampled nodes. Detailed comparison with the state-of-the-art methods demonstrates that our algorithm is 2--10 times more accurate.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2018:CGM, author = "Hongfu Liu and Yun Fu", title = "Consensus Guided Multi-View Clustering", journal = j-TKDD, volume = "12", number = "4", pages = "42:1--42:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182384", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In recent decades, tremendous emerging techniques thrive the artificial intelligence field due to the increasing collected data captured from multiple sensors. These multi-view data provide more rich information than traditional single-view data. Fusing heterogeneous information for certain tasks is a core part of multi-view learning, especially for multi-view clustering. Although numerous multi-view clustering algorithms have been proposed, most scholars focus on finding the common space of different views, but unfortunately ignore the benefits from partition level by ensemble clustering. For ensemble clustering, however, there is no interaction between individual partitions from each view and the final consensus one. To fill the gap, we propose a Consensus Guided Multi-View Clustering (CMVC) framework, which incorporates the generation of basic partitions from each view and fusion of consensus clustering in an interactive way, i.e., the consensus clustering guides the generation of basic partitions, and high quality basic partitions positively contribute to the consensus clustering as well. We design a non-trivial optimization solution to formulate CMVC into two iterative $k$-means clusterings by an approximate calculation. In addition, the generalization of CMVC provides a rich feasibility for different scenarios, and the extension of CMVC with incomplete multi-view clustering further validates the effectiveness for real-world applications. Extensive experiments demonstrate the advantages of CMVC over other widely used multi-view clustering methods in terms of cluster validity, and the robustness of CMVC to some important parameters and incomplete multi-view data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2018:BGD, author = "Hung-Hsuan Chen", title = "{Behavior2Vec}: Generating Distributed Representations of Users' Behaviors on Products for Recommender Systems", journal = j-TKDD, volume = "12", number = "4", pages = "43:1--43:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3184454", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Most studies on recommender systems target at increasing the click through rate, and hope that the number of orders will increase as well. We argue that clicking and purchasing an item are different behaviors. Thus, we should probably apply different strategies for different objectives, e.g., increase the click through rate, or increase the order rate. In this article, we propose to generate the distributed representations of users' viewing and purchasing behaviors on an e-commerce website. By leveraging on the cosine distance between the distributed representations of the behaviors on items under different contexts, we can predict a user's next clicking or purchasing item more precisely, compared to several baseline methods. Perhaps more importantly, we found that the distributed representations may help discover interesting analogies among the products. We may utilize such analogies to explain how two products are related, and eventually apply different recommendation strategies under different scenarios. We developed the Behavior2Vec library for demonstration. The library can be accessed at https://github.com/ncu-dart/behavior2vec/.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Saha:2018:EMO, author = "Sriparna Saha and Sayantan Mitra and Stefan Kramer", title = "Exploring Multiobjective Optimization for Multiview Clustering", journal = j-TKDD, volume = "12", number = "4", pages = "44:1--44:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182181", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present a new multiview clustering approach based on multiobjective optimization. In contrast to existing clustering algorithms based on multiobjective optimization, it is generally applicable to data represented by two or more views and does not require specifying the number of clusters a priori. The approach builds upon the search capability of a multiobjective simulated annealing based technique, AMOSA, as the underlying optimization technique. In the first version of the proposed approach, an internal cluster validity index is used to assess the quality of different partitionings obtained using different views. A new way of checking the compatibility of these different partitionings is also proposed and this is used as another objective function. A new encoding strategy and some new mutation operators are introduced. Finally, a new way of computing a consensus partitioning from multiple individual partitions obtained on multiple views is proposed. As a baseline and for comparison, two multiobjective based ensemble clustering techniques are proposed to combine the outputs of different simple clustering approaches. The efficacy of the proposed clustering methods is shown for partitioning several real-world datasets having multiple views. To show the practical usefulness of the method, we present results on web-search result clustering, where the task is to find a suitable partitioning of web snippets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2018:GRS, author = "Hao Wu and Yue Ning and Prithwish Chakraborty and Jilles Vreeken and Nikolaj Tatti and Naren Ramakrishnan", title = "Generating Realistic Synthetic Population Datasets", journal = j-TKDD, volume = "12", number = "4", pages = "45:1--45:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182383", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Modern studies of societal phenomena rely on the availability of large datasets capturing attributes and activities of synthetic, city-level, populations. For instance, in epidemiology, synthetic population datasets are necessary to study disease propagation and intervention measures before implementation. In social science, synthetic population datasets are needed to understand how policy decisions might affect preferences and behaviors of individuals. In public health, synthetic population datasets are necessary to capture diagnostic and procedural characteristics of patient records without violating confidentialities of individuals. To generate such datasets over a large set of categorical variables, we propose the use of the maximum entropy principle to formalize a generative model such that in a statistically well-founded way we can optimally utilize given prior information about the data, and are unbiased otherwise. An efficient inference algorithm is designed to estimate the maximum entropy model, and we demonstrate how our approach is adept at estimating underlying data distributions. We evaluate this approach against both simulated data and US census datasets, and demonstrate its feasibility using an epidemic simulation application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{LaFond:2018:DSC, author = "Timothy {La Fond} and Jennifer Neville and Brian Gallagher", title = "Designing Size Consistent Statistics for Accurate Anomaly Detection in Dynamic Networks", journal = j-TKDD, volume = "12", number = "4", pages = "46:1--46:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3185059", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "An important task in network analysis is the detection of anomalous events in a network time series. These events could merely be times of interest in the network timeline or they could be examples of malicious activity or network malfunction. Hypothesis testing using network statistics to summarize the behavior of the network provides a robust framework for the anomaly detection decision process. Unfortunately, choosing network statistics that are dependent on confounding factors like the total number of nodes or edges can lead to incorrect conclusions (e.g., false positives and false negatives). In this article, we describe the challenges that face anomaly detection in dynamic network streams regarding confounding factors. We also provide two solutions to avoiding error due to confounding factors: the first is a randomization testing method that controls for confounding factors, and the second is a set of size-consistent network statistics that avoid confounding due to the most common factors, edge count and node count.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nesa:2018:IIG, author = "Nashreen Nesa and Tania Ghosh and Indrajit Banerjee", title = "{iGRM}: Improved Grey Relational Model and Its Ensembles for Occupancy Sensing in {Internet} of Things Applications", journal = j-TKDD, volume = "12", number = "4", pages = "47:1--47:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3186268", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Occupancy detection is one of the many applications of Building Automation Systems (BAS) or Heating, Ventilation, and Air Conditioning (HVAC) control systems, especially, with the rising demand of Internet of Things (IoT) services. This article describes the fusion of data collected from sensors by exploiting their potential to sense occupancy in a room. For this purpose, a sensor test bed is deployed that includes four sensors measuring temperature, relative humidity, distance from the first obstacle, and light along with a Arduino micro-controller to validate our model. In addition, this article proposes three algorithms for efficient fusion of the sensor data that is inspired by the Grey theory. An improved Grey Relational Model (iGRM) is proposed, which acts as the base classifier for the other two algorithms, namely, Grey Relational Model with Bagging (iGRM-BG) and Grey Relational Model with Boosting (iGRM-BT). Furthermore, all three algorithms use a sliding window concept, where only the samples inside the window participate in model training. Also, we have considered varying number of window size for optimal comparison. The algorithms were tested against the experimental data collected through a test bed as well as on a publicly available large dataset, where both the ensemble models, iGRM-BG and iGRM-BT, are seen to enhance the performance of iGRM. The results reveal exceptionally high performances with accuracies above 95\% (iGRM) and up to 100\% (iGRM-BT) for the experimental dataset and above 98.24\% (iGRM) and up to 99.49\% (iGRM-BG) using the publicly available dataset. Among the three proposed models, iGRM-BG was observed to outperform both iGRM and iGRM-BT owing to its advantage of being an ensemble model and its robustness against over-fitting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bressan:2018:MCB, author = "Marco Bressan and Flavio Chierichetti and Ravi Kumar and Stefano Leucci and Alessandro Panconesi", title = "{Motif} Counting Beyond Five Nodes", journal = j-TKDD, volume = "12", number = "4", pages = "48:1--48:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3186586", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Counting graphlets is a well-studied problem in graph mining and social network analysis. Recently, several papers explored very simple and natural algorithms based on Monte Carlo sampling of Markov Chains (MC), and reported encouraging results. We show, perhaps surprisingly, that such algorithms are outperformed by color coding (CC) [2], a sophisticated algorithmic technique that we extend to the case of graphlet sampling and for which we prove strong statistical guarantees. Our computational experiments on graphs with millions of nodes show CC to be more accurate than MC; furthermore, we formally show that the mixing time of the MC approach is too high in general, even when the input graph has high conductance. All this comes at a price however. While MC is very efficient in terms of space, CC's memory requirements become demanding when the size of the input graph and that of the graphlets grow. And yet, our experiments show that CC can push the limits of the state-of-the-art, both in terms of the size of the input graph and of that of the graphlets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nguyen:2018:EUP, author = "Minh-Tien Nguyen and Duc-Vu Tran and Le-Minh Nguyen and Xuan-Hieu Phan", title = "Exploiting User Posts for {Web} Document Summarization", journal = j-TKDD, volume = "12", number = "4", pages = "49:1--49:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3186566", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Relevant user posts such as comments or tweets of a Web document provide additional valuable information to enrich the content of this document. When creating user posts, readers tend to borrow salient words or phrases in sentences. This can be considered as word variation. This article proposes a framework that models the word variation aspect to enhance the quality of Web document summarization. Technically, the framework consists of two steps: scoring and selection. In the first step, the social information of a Web document such as user posts is exploited to model intra-relations and inter-relations in lexical and semantic levels. These relations are denoted by a mutual reinforcement similarity graph used to score each sentence and user post. After scoring, summaries are extracted by using a ranking approach or concept-based method formulated in the form of Integer Linear Programming. To confirm the efficiency of our framework, sentence and story highlight extraction tasks were taken as a case study on three datasets in two languages, English and Vietnamese. Experimental results show that: (i) the framework can improve ROUGE-scores compared to state-of-the-art baselines of social context summarization and (ii) the combination of the two relations benefits the sentence extraction of single Web documents.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2018:ERC, author = "Bo Li and Yevgeniy Vorobeychik", title = "Evasion-Robust Classification on Binary Domains", journal = j-TKDD, volume = "12", number = "4", pages = "50:1--50:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3186282", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The success of classification learning has led to numerous attempts to apply it in adversarial settings such as spam and malware detection. The core challenge in this class of applications is that adversaries are not static, but make a deliberate effort to evade the classifiers. We investigate both the problem of modeling the objectives of such adversaries, as well as the algorithmic problem of accounting for rational, objective-driven adversaries. We first present a general approach based on mixed-integer linear programming (MILP) with constraint generation. This approach is the first to compute an optimal solution to adversarial loss minimization for two general classes of adversarial evasion models in the context of binary feature spaces. To further improve scalability and significantly generalize the scope of the MILP-based method, we propose a principled iterative retraining framework, which can be used with arbitrary classifiers and essentially arbitrary attack models. We show that the retraining approach, when it converges, minimizes an upper bound on adversarial loss. Extensive experiments demonstrate that the mixed-integer programming approach significantly outperforms several state-of-the-art adversarial learning alternatives. Moreover, the retraining framework performs nearly as well, but scales significantly better. Finally, we show that our approach is robust to misspecifications of the adversarial model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mohammadi:2018:COA, author = "Majid Mohammadi and Amir Ahooye Atashin and Wout Hofman and Yaohua Tan", title = "Comparison of Ontology Alignment Systems Across Single Matching Task Via the {McNemar's} Test", journal = j-TKDD, volume = "12", number = "4", pages = "51:1--51:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3193573", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Ontology alignment is widely used to find the correspondences between different ontologies in diverse fields. After discovering the alignments, several performance scores are available to evaluate them. The scores typically require the identified alignment and a reference containing the underlying actual correspondences of the given ontologies. The current trend in the alignment evaluation is to put forward a new score (e.g., precision, weighted precision, semantic precision, etc.) and to compare various alignments by juxtaposing the obtained scores. However, it is substantially provocative to select one measure among others for comparison. On top of that, claiming if one system has a better performance than one another cannot be substantiated solely by comparing two scalars. In this article, we propose the statistical procedures that enable us to theoretically favor one system over one another. The McNemar's test is the statistical means by which the comparison of two ontology alignment systems over one matching task is drawn. The test applies to a 2 $ \times $ 2 contingency table, which can be constructed in two different ways based on the alignments, each of which has their own merits/pitfalls. The ways of the contingency table construction and various apposite statistics from the McNemar's test are elaborated in minute detail. In the case of having more than two alignment systems for comparison, the family wise error rate is expected to happen. Thus, the ways of preventing such an error are also discussed. A directed graph visualizes the outcome of the McNemar's test in the presence of multiple alignment systems. From this graph, it is readily understood if one system is better than one another or if their differences are imperceptible. The proposed statistical methodologies are applied to the systems participated in the OAEI 2016 anatomy track, and also compares several well-known similarity metrics for the same matching problem.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lines:2018:TSC, author = "Jason Lines and Sarah Taylor and Anthony Bagnall", title = "Time Series Classification with {HIVE}-{COTE}: The Hierarchical Vote Collective of Transformation-Based Ensembles", journal = j-TKDD, volume = "12", number = "5", pages = "52:1--52:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182382", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A recent experimental evaluation assessed 19 time series classification (TSC) algorithms and found that one was significantly more accurate than all others: the Flat Collective of Transformation-based Ensembles (Flat-COTE). Flat-COTE is an ensemble that combines 35 classifiers over four data representations. However, while comprehensive, the evaluation did not consider deep learning approaches. Convolutional neural networks (CNN) have seen a surge in popularity and are now state of the art in many fields and raises the question of whether CNNs could be equally transformative for TSC. We implement a benchmark CNN for TSC using a common structure and use results from a TSC-specific CNN from the literature. We compare both to Flat-COTE and find that the collective is significantly more accurate than both CNNs. These results are impressive, but Flat-COTE is not without deficiencies. We significantly improve the collective by proposing a new hierarchical structure with probabilistic voting, defining and including two novel ensemble classifiers built in existing feature spaces, and adding further modules to represent two additional transformation domains. The resulting classifier, the Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE), encapsulates classifiers built on five data representations. We demonstrate that HIVE-COTE is significantly more accurate than Flat-COTE (and all other TSC algorithms that we are aware of) over 100 resamples of 85 TSC problems and is the new state of the art for TSC. Further analysis is included through the introduction and evaluation of 3 new case studies and extensive experimentation on 1,000 simulated datasets of 5 different types.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Amornbunchornvej:2018:CED, author = "Chainarong Amornbunchornvej and Ivan Brugere and Ariana Strandburg-Peshkin and Damien R. Farine and Margaret C. Crofoot and Tanya Y. Berger-Wolf", title = "Coordination Event Detection and Initiator Identification in Time Series Data", journal = j-TKDD, volume = "12", number = "5", pages = "53:1--53:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201406", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Behavior initiation is a form of leadership and is an important aspect of social organization that affects the processes of group formation, dynamics, and decision-making in human societies and other social animal species. In this work, we formalize the Coordination Initiator Inference Problem and propose a simple yet powerful framework for extracting periods of coordinated activity and determining individuals who initiated this coordination, based solely on the activity of individuals within a group during those periods. The proposed approach, given arbitrary individual time series, automatically (1) identifies times of coordinated group activity, (2) determines the identities of initiators of those activities, and (3) classifies the likely mechanism by which the group coordination occurred, all of which are novel computational tasks. We demonstrate our framework on both simulated and real-world data: trajectories tracking of animals as well as stock market data. Our method is competitive with existing global leadership inference methods but provides the first approaches for local leadership and coordination mechanism classification. Our results are consistent with ground-truthed biological data and the framework finds many known events in financial data which are not otherwise reflected in the aggregate NASDAQ index. Our method is easily generalizable to any coordinated time series data from interacting entities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2018:ESC, author = "Peipei Li and Haixun Wang and Hongsong Li and Xindong Wu", title = "Employing Semantic Context for Sparse Information Extraction Assessment", journal = j-TKDD, volume = "12", number = "5", pages = "54:1--54:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201407", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A huge amount of texts available on the World Wide Web presents an unprecedented opportunity for information extraction (IE). One important assumption in IE is that frequent extractions are more likely to be correct. Sparse IE is hence a challenging task because no matter how big a corpus is, there are extractions supported by only a small amount of evidence in the corpus. However, there is limited research on sparse IE, especially in the assessment of the validity of sparse IEs. Motivated by this, we introduce a lightweight, explicit semantic approach for assessing sparse IE.$^1$ We first use a large semantic network consisting of millions of concepts, entities, and attributes to explicitly model the context of any semantic relationship. Second, we learn from three semantic contexts using different base classifiers to select an optimal classification model for assessing sparse extractions. Finally, experiments show that as compared with several state-of-the-art approaches, our approach can significantly improve the $F$-score in the assessment of sparse extractions while maintaining the efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bollegala:2018:CPM, author = "Danushka Bollegala and Vincent Atanasov and Takanori Maehara and Ken-Ichi Kawarabayashi", title = "{ClassiNet} --- Predicting Missing Features for Short-Text Classification", journal = j-TKDD, volume = "12", number = "5", pages = "55:1--55:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201578", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Short and sparse texts such as tweets, search engine snippets, product reviews, and chat messages are abundant on the Web. Classifying such short-texts into a pre-defined set of categories is a common problem that arises in various contexts, such as sentiment classification, spam detection, and information recommendation. The fundamental problem in short-text classification is feature sparseness --- the lack of feature overlap between a trained model and a test instance to be classified. We propose ClassiNet --- a network of classifiers trained for predicting missing features in a given instance, to overcome the feature sparseness problem. Using a set of unlabeled training instances, we first learn binary classifiers as feature predictors for predicting whether a particular feature occurs in a given instance. Next, each feature predictor is represented as a vertex v$_i$ in the ClassiNet, where a one-to-one correspondence exists between feature predictors and vertices. The weight of the directed edge e$_{ij}$ connecting a vertex v$_i$ to a vertex v$_j$ represents the conditional probability that given v$_i$ exists in an instance, v$_j$ also exists in the same instance. We show that ClassiNets generalize word co-occurrence graphs by considering implicit co-occurrences between features. We extract numerous features from the trained ClassiNet to overcome feature sparseness. In particular, for a given instance x, we find similar features from ClassiNet that did not appear in x, and append those features in the representation of x. Moreover, we propose a method based on graph propagation to find features that are indirectly related to a given short-text. We evaluate ClassiNets on several benchmark datasets for short-text classification. Our experimental results show that by using ClassiNet, we can statistically significantly improve the accuracy in short-text classification tasks, without having to use any external resources such as thesauri for finding related features.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qin:2018:STR, author = "Tian Qin and Wufan Shangguan and Guojie Song and Jie Tang", title = "Spatio-Temporal Routine Mining on Mobile Phone Data", journal = j-TKDD, volume = "12", number = "5", pages = "56:1--56:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201577", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Mining human behaviors has always been an important subarea of Data Mining. While it provides empirical evidences to psychological/behavioral studies, it also builds the foundation of various big-data systems, which rely heavily on the prediction of human behaviors. In recent years, the ubiquitous spreading of mobile phones and the massive amount of spatio-temporal data collected from them make it possible to keep track of the daily commute behaviors of mobile subscribers and further conduct routine mining on them. In this article, we propose to model mobile subscribers' daily commute behaviors by three levels: location trajectory, one-day pattern, and routine pattern. We develop the model Spatio-Temporal Routine Mining Model (STRMM) to characterize the generative process between these three levels. From daily trajectories, the STRMM model unsupervisedly extracts spatio-temporal routine patterns that contain two aspects of information: (1) How people's typical commute patterns are. (2) How much their commute behaviors vary from day to day. Compared to traditional methods, STRMM takes into account the different degrees of behavioral uncertainty in different timespans of a day, yielding more realistic and intuitive results. To learn model parameters, we adopt Stochastic Expectation Maximization algorithm. Experiments are conducted on two real world datasets, and the empirical results show that the STRMM model can effectively discover hidden routine patterns of human commute behaviors and yields higher accuracy results in trajectory prediction task.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2018:SRI, author = "Ziqi Zhang and Jie Gao and Fabio Ciravegna", title = "{SemRe-Rank}: Improving Automatic Term Extraction by Incorporating Semantic Relatedness with Personalised {PageRank}", journal = j-TKDD, volume = "12", number = "5", pages = "57:1--57:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201408", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Automatic Term Extraction (ATE) deals with the extraction of terminology from a domain specific corpus, and has long been an established research area in data and knowledge acquisition. ATE remains a challenging task as it is known that there is no existing ATE methods that can consistently outperform others in any domain. This work adopts a refreshed perspective to this problem: instead of searching for such a `one-size-fit-all' solution that may never exist, we propose to develop generic methods to `enhance' existing ATE methods. We introduce SemRe-Rank, the first method based on this principle, to incorporate semantic relatedness-an often overlooked venue-into an existing ATE method to further improve its performance. SemRe-Rank incorporates word embeddings into a personalised PageRank process to compute `semantic importance' scores for candidate terms from a graph of semantically related words (nodes), which are then used to revise the scores of candidate terms computed by a base ATE algorithm. Extensively evaluated with 13 state-of-the-art base ATE methods on four datasets of diverse nature, it is shown to have achieved widespread improvement over all base methods and across all datasets, with up to 15 percentage points when measured by the Precision in the top ranked K candidate terms (the average for a set of K 's), or up to 28 percentage points in F1 measured at a K that equals to the expected real terms in the candidates (F1 in short). Compared to an alternative approach built on the well-known TextRank algorithm, SemRe-Rank can potentially outperform by up to 8 points in Precision at top K, or up to 17 points in F1.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hao:2018:OAL, author = "Shuji Hao and Peiying Hu and Peilin Zhao and Steven C. H. Hoi and Chunyan Miao", title = "Online Active Learning with Expert Advice", journal = j-TKDD, volume = "12", number = "5", pages = "58:1--58:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201604", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In literature, learning with expert advice methods usually assume that a learner always obtain the true label of every incoming training instance at the end of each trial. However, in many real-world applications, acquiring the true labels of all instances can be both costly and time consuming, especially for large-scale problems. For example, in the social media, data stream usually comes in a high speed and volume, and it is nearly impossible and highly costly to label all of the instances. In this article, we address this problem with active learning with expert advice, where the ground truth of an instance is disclosed only when it is requested by the proposed active query strategies. Our goal is to minimize the number of requests while training an online learning model without sacrificing the performance. To address this challenge, we propose a framework of active forecasters, which attempts to extend two fully supervised forecasters, Exponentially Weighted Average Forecaster and Greedy Forecaster, to tackle the task of online active learning (OAL) with expert advice. Specifically, we proposed two OAL with expert advice algorithms, named Active Exponentially Weighted Average Forecaster (AEWAF) and active greedy forecaster (AGF), by considering the difference of expert advices. To further improve the robustness of the proposed AEWAF and AGF algorithms in the noisy scenarios (where noisy experts exist), we also proposed two robust active learning with expert advice algorithms, named Robust Active Exponentially Weighted Average Forecaster and Robust Active Greedy Forecaster. We validate the efficacy of the proposed algorithms by an extensive set of experiments in both normal scenarios (where all of experts are comparably reliable) and noisy scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Silva:2018:DMA, author = "Fabr{\'\i}cio A. Silva and Augusto C. S. A. Domingues and Thais R. M. Braga Silva", title = "Discovering Mobile Application Usage Patterns from a Large-Scale Dataset", journal = j-TKDD, volume = "12", number = "5", pages = "59:1--59:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3209669", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The discovering of patterns regarding how, when, and where users interact with mobile applications reveals important insights for mobile service providers. In this work, we exploit for the first time a real and large-scale dataset representing the records of mobile application usage of 5,342 users during 2014. The data was collected by a software agent, installed at the users' smartphones, which monitors detailed usage of applications. First, we look for general patterns of how users access some of the most popular mobile applications in terms of frequency, duration, diversity, and data traffic. Next, we mine the dataset looking for temporal patterns in terms of when and how often accesses occur. Finally, we exploit the location of each access to detect users' points of interest and location-based communities. Based on the results, we derive a model to generate synthetic datasets of mobile application usage and evaluate solutions to predict the next application to be launched. We also discuss a series of implications of the findings regarding telecommunication services, mobile advertisements, and smart cities. This is the first time this dataset is used, and we also make it publicly available for other researchers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2018:CQE, author = "Feijiang Li and Yuhua Qian and Jieting Wang and Chuangyin Dang and Bing Liu", title = "{Cluster}'s Quality Evaluation and Selective Clustering Ensemble", journal = j-TKDD, volume = "12", number = "5", pages = "60:1--60:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3211872", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering ensemble has drawn much attention in recent years due to its ability to generate a high quality and robust partition result. Weighted clustering ensemble and selective clustering ensemble are two general ways to further improve the performance of a clustering ensemble method. Existing weighted clustering ensemble methods assign the same weight to each cluster in a partition of the ensemble. Since the qualities of the clusters in a partition are different, the clusters should be weighted differently. To address this issue, this article proposes a new measure to calculate the similarity between a cluster and a partition. Theoretically, this measure is effective in handling two problems in measuring the quality of a cluster, which are defined as the symmetric problem and the context meaning problem. In addition, some properties of the proposed measure are analyzed. This measure can be easily expanded to a clustering performance measure that calculates the similarity between two partitions. As a result of this measure, we propose a novel selective clustering ensemble framework, which considers the differences between the objective of the ensemble selection stage and the object of the ensemble integration stage in the selective clustering ensemble. To verify the performance of the new measure, we compare the performance of the measure with the two existing measures in weighting clusters. The experiments show that the proposed measure is more effective. To verify the performance of the novel framework, four existing state-of-the-art selective clustering ensemble frameworks are employed as references. The experiments show that the proposed framework is statistically better than the others on 17 UCI benchmark datasets, 8 document datasets, and the Olivetti Face Database.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Riondato:2018:AAB, author = "Matteo Riondato and Eli Upfal", title = "{ABRA}: Approximating Betweenness Centrality in Static and Dynamic Graphs with {Rademacher} Averages", journal = j-TKDD, volume = "12", number = "5", pages = "61:1--61:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3208351", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "ABPA$ \Xi $A$ \Sigma $ (ABRAXAS): Gnostic word of mystic meaning. We present ABRA, a suite of algorithms to compute and maintain probabilistically guaranteed high-quality approximations of the betweenness centrality of all nodes (or edges) on both static and fully dynamic graphs. Our algorithms use progressive random sampling and their analysis rely on Rademacher averages and pseudodimension, fundamental concepts from statistical learning theory. To our knowledge, ABRA is the first application of these concepts to the field of graph analysis. Our experimental results show that ABRA is much faster than exact methods, and vastly outperforms, in both runtime number of samples, and accuracy, state-of-the-art algorithms with the same quality guarantees.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{DosSantos:2018:RLC, author = "Ludovic {Dos Santos} and Benjamin Piwowarski and Ludovic Denoyer and Patrick Gallinari", title = "Representation Learning for Classification in Heterogeneous Graphs with Application to Social Networks", journal = j-TKDD, volume = "12", number = "5", pages = "62:1--62:??", month = jul, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3201603", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We address the task of node classification in heterogeneous networks, where the nodes are of different types, each type having its own set of labels, and the relations between nodes may also be of different types. A typical example is provided by social networks where node types may for example be users, content, or films, and relations friendship, like, authorship. Learning and performing inference on such heterogeneous networks is a recent task requiring new models and algorithms. We propose a model, Labeling Heterogeneous Network (LaHNet), a transductive approach to classification that learns to project the different types of nodes into a common latent space. This embedding is learned so as to reflect different characteristics of the problem such as the correlation between node labels, as well as the graph topology. The application focus is on social graphs, but the algorithm is general and can be used for other domains. The model is evaluated on five datasets representative of different instances of social data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2018:CCE, author = "Can Wang and Chi-Hung Chi and Zhong She and Longbing Cao and Bela Stantic", title = "Coupled Clustering Ensemble by Exploring Data Interdependence", journal = j-TKDD, volume = "12", number = "6", pages = "63:1--63:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3230967", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering ensembles combine multiple partitions of data into a single clustering solution. It is an effective technique for improving the quality of clustering results. Current clustering ensemble algorithms are usually built on the pairwise agreements between clusterings that focus on the similarity via consensus functions, between data objects that induce similarity measures from partitions and re-cluster objects, and between clusters that collapse groups of clusters into meta-clusters. In most of those models, there is a strong assumption on IIDness (i.e., independent and identical distribution), which states that base clusterings perform independently of one another and all objects are also independent. In the real world, however, objects are generally likely related to each other through features that are either explicit or even implicit. There is also latent but definite relationship among intermediate base clusterings because they are derived from the same set of data. All these demand a further investigation of clustering ensembles that explores the interdependence characteristics of data. To solve this problem, a new coupled clustering ensemble ( CCE ) framework that works on the interdependence nature of objects and intermediate base clusterings is proposed in this article. The main idea is to model the coupling relationship between objects by aggregating the similarity of base clusterings, and the interactive relationship among objects by addressing their neighborhood domains. Once these interdependence relationships are discovered, they will act as critical supplements to clustering ensembles. We verified our proposed framework by using three types of consensus function: clustering-based, object-based, and cluster-based. Substantial experiments on multiple synthetic and real-life benchmark datasets indicate that CCE can effectively capture the implicit interdependence relationships among base clusterings and among objects with higher clustering accuracy, stability, and robustness compared to 14 state-of-the-art techniques, supported by statistical analysis. In addition, we show that the final clustering quality is dependent on the data characteristics (e.g., quality and consistency) of base clusterings in terms of sensitivity analysis. Finally, the applications in document clustering, as well as on the datasets with much larger size and dimensionality, further demonstrate the effectiveness, efficiency, and scalability of our proposed models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2018:EBQ, author = "Zhipeng Huang and Bogdan Cautis and Reynold Cheng and Yudian Zheng and Nikos Mamoulis and Jing Yan", title = "Entity-Based Query Recommendation for Long-Tail Queries", journal = j-TKDD, volume = "12", number = "6", pages = "64:1--64:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233186", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Query recommendation, which suggests related queries to search engine users, has attracted a lot of attention in recent years. Most of the existing solutions, which perform analysis of users' search history (or query logs ), are often insufficient for long-tail queries that rarely appear in query logs. To handle such queries, we study the use of entities found in queries to provide recommendations. Specifically, we extract entities from a query, and use these entities to explore new ones by consulting an information source. The discovered entities are then used to suggest new queries to the user. In this article, we examine two information sources: (1) a knowledge base (or KB), such as YAGO and Freebase; and (2) a click log, which contains the URLs accessed by a query user. We study how to use these sources to find new entities useful for query recommendation. We further study a hybrid framework that integrates different query recommendation methods effectively. As shown in the experiments, our proposed approaches provide better recommendations than existing solutions for long-tail queries. In addition, our query recommendation process takes less than 100ms to complete. Thus, our solution is suitable for providing online query recommendation services for search engines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2018:MAD, author = "Xiaoli Liu and Peng Cao and Andr{\'e} R. Gon{\c{c}}alves and Dazhe Zhao and Arindam Banerjee", title = "Modeling {Alzheimer}'s Disease Progression with Fused {Laplacian} Sparse Group Lasso", journal = j-TKDD, volume = "12", number = "6", pages = "65:1--65:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3230668", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Alzheimer's disease (AD), the most common type of dementia, not only imposes a huge financial burden on the health care system, but also a psychological and emotional burden on patients and their families. There is thus an urgent need to infer trajectories of cognitive performance over time and identify biomarkers predictive of the progression. In this article, we propose the multi-task learning with fused Laplacian sparse group lasso model, which can identify biomarkers closely related to cognitive measures due to its sparsity-inducing property, and model the disease progression with a general weighted (undirected) dependency graphs among the tasks. An efficient alternative directions method of multipliers based optimization algorithm is derived to solve the proposed non-smooth objective formulation. The effectiveness of the proposed model is demonstrated by its superior prediction performance over multiple state-of-the-art methods and accurate identification of compact sets of cognition-relevant imaging biomarkers that are consistent with prior medical studies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2018:SRI, author = "Xinran He and David Kempe", title = "Stability and Robustness in Influence Maximization", journal = j-TKDD, volume = "12", number = "6", pages = "66:1--66:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233227", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In the well-studied Influence Maximization problem, the goal is to identify a set of k nodes in a social network whose joint influence on the network is maximized. A large body of recent work has justified research on Influence Maximization models and algorithms with their potential to create societal or economic value. However, in order to live up to this potential, the algorithms must be robust to large amounts of noise, for they require quantitative estimates of the influence, which individuals exert on each other; ground truth for such quantities is inaccessible, and even decent estimates are very difficult to obtain. We begin to address this concern formally. First, we exhibit simple inputs on which even very small estimation errors may mislead every algorithm into highly suboptimal solutions. Motivated by this observation, we propose the Perturbation Interval model as a framework to characterize the stability of Influence Maximization against noise in the inferred diffusion network. Analyzing the susceptibility of specific instances to estimation errors leads to a clean algorithmic question, which we term the Influence Difference Maximization problem. However, the objective function of Influence Difference Maximization is NP-hard to approximate within a factor of $ O(n^{(1 - \epsilon)}) $ for any $ \epsilon > 0 $. Given the infeasibility of diagnosing instability algorithmically, we focus on finding influential users robustly across multiple diffusion settings. We define a Robust Influence Maximization framework wherein an algorithm is presented with a set of influence functions. The algorithm's goal is to identify a set of k nodes who are simultaneously influential for all influence functions, compared to the (function-specific) optimum solutions. We show strong approximation hardness results for this problem unless the algorithm gets to select at least a logarithmic factor more seeds than the optimum solution. However, when enough extra seeds may be selected, we show that techniques of Krause et al. can be used to approximate the optimum robust influence to within a factor of $ 1 - 1 / e $. We evaluate this bicriteria approximation algorithm against natural heuristics on several real-world datasets. Our experiments indicate that the worst-case hardness does not necessarily translate into bad performance on real-world datasets; all algorithms perform fairly well.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Romero-Tris:2018:PPT, author = "Cristina Romero-Tris and David Meg{\'\i}as", title = "Protecting Privacy in Trajectories with a User-Centric Approach", journal = j-TKDD, volume = "12", number = "6", pages = "67:1--67:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233185", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The increased use of location-aware devices, such as smartphones, generates a large amount of trajectory data. These data can be useful in several domains, like marketing, path modeling, localization of an epidemic focus, and so on. Nevertheless, since trajectory information contains personal mobility data, improper use or publication of trajectory data can threaten users' privacy. It may reveal sensitive details like habits of behavior, religious beliefs, and sexual preferences. Therefore, many users might be unwilling to share their trajectory data without a previous anonymization process. Currently, several proposals to address this problem can be found in the literature. These solutions focus on anonymizing data before its publication, i.e., when they are already stored in the server database. Nevertheless, we argue that this approach gives the user no control about the information she shares. For this reason, we propose anonymizing data in the users' mobile devices, before they are sent to a third party. This article extends our previous work which was, to the best of our knowledge, the first one to anonymize data at the client side, allowing users to select the amount and accuracy of shared data. In this article, we describe an improved version of the protocol, and we include the implementation together with an analysis of the results obtained after the simulation with real trajectory data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ying:2018:FIG, author = "Josh Jia-Ching Ying and Ji Zhang and Che-Wei Huang and Kuan-Ta Chen and Vincent S. Tseng", title = "{FrauDetector+}: an Incremental Graph-Mining Approach for Efficient Fraudulent Phone Call Detection", journal = j-TKDD, volume = "12", number = "6", pages = "68:1--68:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3234943", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In recent years, telecommunication fraud has become more rampant internationally with the development of modern technology and global communication. Because of rapid growth in the volume of call logs, the task of fraudulent phone call detection is confronted with big data issues in real-world implementations. Although our previous work, FrauDetector, addressed this problem and achieved some promising results, it can be further enhanced because it focuses only on fraud detection accuracy, whereas the efficiency and scalability are not top priorities. Other known approaches for fraudulent call number detection suffer from long training times or cannot accurately detect fraudulent phone calls in real time. However, the learning process of FrauDetector is too time-consuming to support real-world application. Although we have attempted to accelerate the the learning process of FrauDetector by parallelization, the parallelized learning process, namely PFrauDetector, still cannot afford the computing cost. In this article, we propose a highly efficient incremental graph-mining-based fraudulent phone call detection approach, namely FrauDetector$^+$, which can automatically label fraudulent phone numbers with a ``fraud'' tag a crucial prerequisite for distinguishing fraudulent phone call numbers from nonfraudulent ones. FrauDetector$^+$ initially generates smaller, more manageable subnetworks from original graph and performs a parallelized weighted HITS algorithm for a significant speed increase in the graph learning module. It adopts a novel aggregation approach to generate a trust (or experience) value for each phone number (or user) based on their respective local values. After the initial procedure, we can incrementally update the trust (or experience) value for each phone number (or user) while a new fraud phone number is identified. An efficient fraud-centric hash structure is constructed to support fast real-time detection of fraudulent phone numbers in the detection module. We conduct a comprehensive experimental study based on real datasets collected through an antifraud mobile application called Whoscall. The results demonstrate a significantly improved efficiency of our approach compared with FrauDetector as well as superior performance against other major classifier-based methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Di:2018:LSA, author = "Mingyang Di and Diego Klabjan and Long Sha and Patrick Lucey", title = "Large-Scale Adversarial Sports Play Retrieval with Learning to Rank", journal = j-TKDD, volume = "12", number = "6", pages = "69:1--69:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3230667", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "As teams of professional leagues are becoming more and more analytically driven, the interest in effective data management and access of sports plays has dramatically increased. In this article, we present a retrieval system that can quickly find the most relevant plays from historical games given an input query. To search through a large number of games at an interactive speed, our system is built upon a distributed framework so that each query-result pair is evaluated in parallel. We also propose a pairwise learning to rank approach to improve search ranking based on users' clickthrough behavior. The similarity metric in training the rank function is based on automatically learnt features from a convolutional autoencoder. Finally, we showcase the efficacy of our learning to rank approach by demonstrating rank quality in a user study.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2018:GEF, author = "Xiao Huang and Jundong Li and Na Zou and Xia Hu", title = "A General Embedding Framework for Heterogeneous Information Learning in Large-Scale Networks", journal = j-TKDD, volume = "12", number = "6", pages = "70:1--70:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241063", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Network analysis has been widely applied in many real-world tasks, such as gene analysis and targeted marketing. To extract effective features for these analysis tasks, network embedding automatically learns a low-dimensional vector representation for each node, such that the meaningful topological proximity is well preserved. While the embedding algorithms on pure topological structure have attracted considerable attention, in practice, nodes are often abundantly accompanied with other types of meaningful information, such as node attributes, second-order proximity, and link directionality. A general framework for incorporating the heterogeneous information into network embedding could be potentially helpful in learning better vector representations. However, it remains a challenging task to jointly embed the geometrical structure and a distinct type of information due to the heterogeneity. In addition, the real-world networks often contain a large number of nodes, which put demands on the scalability of the embedding algorithms. To bridge the gap, in this article, we propose a general embedding framework named Heterogeneous Information Learning in Large-scale networks (HILL) to accelerate the joint learning. It enables the simultaneous node proximity assessing process to be done in a distributed manner by decomposing the complex modeling and optimization into many simple and independent sub-problems. We validate the significant correlation between the heterogeneous information and topological structure, and illustrate the generalizability of HILL by applying it to perform attributed network embedding and second-order proximity learning. A variation is proposed for link directionality modeling. Experimental results on real-world networks demonstrate the effectiveness and efficiency of HILL.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Park:2018:ETS, author = "Ha-Myung Park and Francesco Silvestri and Rasmus Pagh and Chin-Wan Chung and Sung-Hyon Myaeng and U. Kang", title = "Enumerating Trillion Subgraphs On Distributed Systems", journal = j-TKDD, volume = "12", number = "6", pages = "71:1--71:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3237191", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "How can we find patterns from an enormous graph with billions of vertices and edges? The subgraph enumeration, which is to find patterns from a graph, is an important task for graph data analysis with many applications, including analyzing the social network evolution, measuring the significance of motifs in biological networks, observing the dynamics of Internet, and so on. Especially, the triangle enumeration, a special case of the subgraph enumeration, where the pattern is a triangle, has many applications such as identifying suspicious users in social networks, detecting web spams, and finding communities. However, recent networks are so large that most of the previous algorithms fail to process them. Recently, several MapReduce algorithms have been proposed to address such large networks; however, they suffer from the massive shuffled data resulting in a very long processing time. In this article, we propose scalable methods for enumerating trillion subgraphs on distributed systems. We first propose PTE ( Pre-partitioned Triangle Enumeration ), a new distributed algorithm for enumerating triangles in enormous graphs by resolving the structural inefficiency of the previous MapReduce algorithms. PTE enumerates trillions of triangles in a billion scale graph by decreasing three factors: the amount of shuffled data, total work, and network read. We also propose PSE ( Pre-partitioned Subgraph Enumeration ), a generalized version of PTE for enumerating subgraphs that match an arbitrary query graph. Experimental results show that PTE provides 79 times faster performance than recent distributed algorithms on real-world graphs, and succeeds in enumerating more than 3 trillion triangles on the ClueWeb12 graph with 6.3 billion vertices and 72 billion edges. Furthermore, PSE successfully enumerates 265 trillion clique subgraphs with 4 vertices from a subdomain hyperlink network, showing 47 times faster performance than the state of the art distributed subgraph enumeration algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wen:2018:EAD, author = "Xidao Wen and Yu-Ru Lin and Konstantinos Pelechrinis", title = "Event Analytics via Discriminant Tensor Factorization", journal = j-TKDD, volume = "12", number = "6", pages = "72:1--72:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3184455", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Analyzing the impact of disastrous events has been central to understanding and responding to crises. Traditionally, the assessment of disaster impact has primarily relied on the manual collection and analysis of surveys and questionnaires as well as the review of authority reports. This can be costly and time-consuming, whereas a timely assessment of an event's impact is critical for crisis management and humanitarian operations. In this work, we formulate the impact discovery as the problem to identify the shared and discriminative subspace via tensor factorization due to the multi-dimensional nature of mobility data. Existing work in mining the shared and discriminative subspaces typically requires the predefined number of either type of them. In the context of event impact discovery, this could be impractical, especially for those unprecedented events. To overcome this, we propose a new framework, called ``PairFac,'' that jointly factorizes the multi-dimensional data to discover the latent mobility pattern along with its associated discriminative weight. This framework does not require splitting the shared and discriminative subspaces in advance and at the same time automatically captures the persistent and changing patterns from multi-dimensional behavioral data. Our work has important applications in crisis management and urban planning, which provides a timely assessment of impacts of major events in the urban environment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2018:SSL, author = "Chaochao Chen and Kevin Chen-Chuan Chang and Qibing Li and Xiaolin Zheng", title = "Semi-supervised Learning Meets Factorization: Learning to Recommend with Chain Graph Model", journal = j-TKDD, volume = "12", number = "6", pages = "73:1--73:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3264745", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recently, latent factor model (LFM) has been drawing much attention in recommender systems due to its good performance and scalability. However, existing LFMs predict missing values in a user-item rating matrix only based on the known ones, and thus the sparsity of the rating matrix always limits their performance. Meanwhile, semi-supervised learning (SSL) provides an effective way to alleviate the label (i.e., rating) sparsity problem by performing label propagation, which is mainly based on the smoothness insight on affinity graphs. However, graph-based SSL suffers serious scalability and graph unreliable problems when directly being applied to do recommendation. In this article, we propose a novel probabilistic chain graph model (CGM) to marry SSL with LFM. The proposed CGM is a combination of Bayesian network and Markov random field. The Bayesian network is used to model the rating generation and regression procedures, and the Markov random field is used to model the confidence-aware smoothness constraint between the generated ratings. Experimental results show that our proposed CGM significantly outperforms the state-of-the-art approaches in terms of four evaluation metrics, and with a larger performance margin when data sparsity increases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Siddiqui:2019:SFE, author = "Md Amran Siddiqui and Alan Fern and Thomas G. Dietterich and Weng-Keen Wong", title = "Sequential Feature Explanations for Anomaly Detection", journal = j-TKDD, volume = "13", number = "1", pages = "1:1--1:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3230666", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3230666", abstract = "In many applications, an anomaly detection system presents the most anomalous data instance to a human analyst, who then must determine whether the instance is truly of interest (e.g., a threat in a security setting). Unfortunately, most anomaly detectors provide no explanation about why an instance was considered anomalous, leaving the analyst with no guidance about where to begin the investigation. To address this issue, we study the problems of computing and evaluating sequential feature explanations (SFEs) for anomaly detectors. An SFE of an anomaly is a sequence of features, which are presented to the analyst one at a time (in order) until the information contained in the highlighted features is enough for the analyst to make a confident judgement about the anomaly. Since analyst effort is related to the amount of information that they consider in an investigation, an explanation's quality is related to the number of features that must be revealed to attain confidence. In this article, we first formulate the problem of optimizing SFEs for a particular density-based anomaly detector. We then present both greedy algorithms and an optimal algorithm, based on branch-and-bound search, for optimizing SFEs. Finally, we provide a large scale quantitative evaluation of these algorithms using a novel framework for evaluating explanations. The results show that our algorithms are quite effective and that our best greedy algorithm is competitive with optimal solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2019:DDS, author = "Xiaoming Liu and Chao Shen and Xiaohong Guan and Yadong Zhou", title = "Digger: Detect Similar Groups in Heterogeneous Social Networks", journal = j-TKDD, volume = "13", number = "1", pages = "2:1--2:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3267106", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3267106", abstract = "People participate in multiple online social networks, e.g., Facebook, Twitter, and Linkedin, and these social networks with heterogeneous social content and user relationship are named as heterogeneous social networks. Group structure widely exists in heterogeneous social networks, which reveals the evolution of human cooperation. Detecting similar groups in heterogeneous networks has a great significance for many applications, such as recommendation system and spammer detection, using the wealth of group information. Although promising, this novel problem encounters a variety of technical challenges, including incomplete data, high time complexity, and ground truth. To address the research gap and technical challenges, we take advantage of a ratio-cut optimization function to model this novel problem by the linear mixed-effects method and graph spectral theory. Based on this model, we propose an efficient algorithm called Digger to detect the similar groups in the large graphs. Digger consists of three steps, including measuring user similarity, construct a matching graph, and detecting similar groups. We adopt several strategies to lower the computational cost and detail the basis of labeling the ground truth. We evaluate the effectiveness and efficiency of our algorithm on five different types of online social networks. The extensive experiments show that our method achieves 0.693, 0.783, and 0.735 in precision, recall, and F1-measure, which significantly surpass the state-of-arts by 24.4\%, 15.3\%, and 20.7\%, respectively. The results demonstrate that our proposal can detect similar groups in heterogeneous networks effectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lagree:2019:AOI, author = "Paul Lagr{\'e}e and Olivier Capp{\'e} and Bogdan Cautis and Silviu Maniu", title = "Algorithms for Online Influencer Marketing", journal = j-TKDD, volume = "13", number = "1", pages = "3:1--3:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3274670", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274670", abstract = "Influence maximization is the problem of finding influential users, or nodes, in a graph so as to maximize the spread of information. It has many applications in advertising and marketing on social networks. In this article, we study a highly generic version of influence maximization, one of optimizing influence campaigns by sequentially selecting ``spread seeds'' from a set of influencers, a small subset of the node population, under the hypothesis that, in a given campaign, previously activated nodes remain persistently active. This problem is in particular relevant for an important form of online marketing, known as influencer marketing, in which the marketers target a sub-population of influential people, instead of the entire base of potential buyers. Importantly, we make no assumptions on the underlying diffusion model, and we work in a setting where neither a diffusion network nor historical activation data are available. We call this problem online influencer marketing with persistence (in short, OIMP). We first discuss motivating scenarios and present our general approach. We introduce an estimator on the influencers' remaining potential --- the expected number of nodes that can still be reached from a given influencer --- and justify its strength to rapidly estimate the desired value, relying on real data gathered from Twitter. We then describe a novel algorithm, GT-UCB, relying on probabilistic upper confidence bounds on the remaining potential. We show that our approach leads to high-quality spreads on both simulated and real datasets. Importantly, it is orders of magnitude faster than state-of-the-art influence maximization methods, making it possible to deal with large-scale online scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tao:2019:RSE, author = "Zhiqiang Tao and Hongfu Liu and Sheng Li and Zhengming Ding and Yun Fu", title = "Robust Spectral Ensemble Clustering via Rank Minimization", journal = j-TKDD, volume = "13", number = "1", pages = "4:1--4:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3278606", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3278606", abstract = "Ensemble Clustering (EC) is an important topic for data cluster analysis. It targets to integrate multiple Basic Partitions (BPs) of a particular dataset into a consensus partition. Among previous works, one promising and effective way is to transform EC as a graph partitioning problem on the co-association matrix, which is a pair-wise similarity matrix summarized by all the BPs in essence. However, most existing EC methods directly utilize the co-association matrix, yet without considering various noises (e.g., the disagreement between different BPs and the outliers) that may exist in it. These noises can impair the cluster structure of a co-association matrix, and thus mislead the final graph partitioning process. To address this challenge, we propose a novel Robust Spectral Ensemble Clustering (RSEC) algorithm in this article. Specifically, we learn low-rank representation (LRR) for the co-association matrix to uncover its cluster structure and handle the noises, and meanwhile, we perform spectral clustering with the learned representation to seek for a consensus partition. These two steps are jointly proceeded within a unified optimization framework. In particular, during the optimizing process, we leverage consensus partition to iteratively enhance the block-diagonal structure of LRR, in order to assist the graph partitioning. To solve RSEC, we first formulate it by using nuclear norm as a convex proxy to the rank function. Then, motivated by the recent advances in non-convex rank minimization, we further develop a non-convex model for RSEC and provide it a solution by the majorization--minimization Augmented Lagrange Multiplier algorithm. Experiments on 18 real-world datasets demonstrate the effectiveness of our algorithm compared with state-of-the-art methods. Moreover, several impact factors on the clustering performance of our approach are also explored extensively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jaysawal:2019:PAP, author = "Bijay Prasad Jaysawal and Jen-Wei Huang", title = "{PSP}-{AMS}: Progressive Mining of Sequential Patterns Across Multiple Streams", journal = j-TKDD, volume = "13", number = "1", pages = "5:1--5:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3281632", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3281632", abstract = "Sequential pattern mining is used to find frequent data sequences over time. When sequential patterns are generated, the newly arriving patterns may not be identified as frequent sequential patterns due to the existence of old data and sequences. Progressive sequential pattern mining aims to find the most up-to-date sequential patterns given that obsolete items will be deleted from the sequences. When sequences come with multiple data streams, it is difficult to maintain and update the current sequential patterns. Even worse, when we consider the sequences across multiple streams, previous methods cannot efficiently compute the frequent sequential patterns. In this work, we propose an efficient algorithm PSP-AMS to address this problem. PSP-AMS uses a novel data structure PSP-MS-tree to insert new items, update current items, and delete obsolete items. By maintaining a PSP-MS-tree, PSP-AMS efficiently finds the frequent sequential patterns across multiple streams. The experimental results show that PSP-AMS significantly outperforms previous algorithms for mining of progressive sequential patterns across multiple streams on synthetic data as well as real data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Song:2019:TCA, author = "Qingquan Song and Hancheng Ge and James Caverlee and Xia Hu", title = "Tensor Completion Algorithms in Big Data Analytics", journal = j-TKDD, volume = "13", number = "1", pages = "6:1--6:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3278607", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3278607", abstract = "Tensor completion is a problem of filling the missing or unobserved entries of partially observed tensors. Due to the multidimensional character of tensors in describing complex datasets, tensor completion algorithms and their applications have received wide attention and achievement in areas like data mining, computer vision, signal processing, and neuroscience. In this survey, we provide a modern overview of recent advances in tensor completion algorithms from the perspective of big data analytics characterized by diverse variety, large volume, and high velocity. We characterize these advances from the following four perspectives: general tensor completion algorithms, tensor completion with auxiliary information (variety), scalable tensor completion algorithms (volume), and dynamic tensor completion algorithms (velocity). Further, we identify several tensor completion applications on real-world data-driven problems and present some common experimental frameworks popularized in the literature along with several available software repositories. Our goal is to summarize these popular methods and introduce them to researchers and practitioners for promoting future research and applications. We conclude with a discussion of key challenges and promising research directions in this community for future exploration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moghaz:2019:TME, author = "Dror Moghaz and Yaakov Hacohen-Kerner and Dov Gabbay", title = "Text Mining for Evaluating Authors' Birth and Death Years", journal = j-TKDD, volume = "13", number = "1", pages = "7:1--7:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3281631", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3281631", abstract = "This article presents a unique method in text and data mining for finding the era, i.e., mining temporal data, in which an anonymous author was living. Finding this era can assist in the examination of a fake document or extracting the time period in which a writer lived. The study and the experiments concern Hebrew, and in some parts, Aramaic and Yiddish rabbinic texts. The rabbinic texts are undated and contain no bibliographic sections, posing an interesting challenge. This work proposes algorithms using key phrases and key words that allow the temporal organization of citations together with linguistic patterns. Based on these key phrases, key words, and the references, we established several types of ``Iron-clad,'' Heuristic and Greedy rules for estimating the years of birth and death of a writer in an interesting classification task. Experiments were conducted on corpora, including documents authored by 12, 24, and 36 rabbinic writers and demonstrated promising results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2019:DRW, author = "Hung-Hsuan Chen and Pu Chen", title = "Differentiating Regularization Weights --- A Simple Mechanism to Alleviate Cold Start in Recommender Systems", journal = j-TKDD, volume = "13", number = "1", pages = "8:1--8:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3285954", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3285954", abstract = "Matrix factorization (MF) and its extended methodologies have been studied extensively in the community of recommender systems in the last decade. Essentially, MF attempts to search for low-ranked matrices that can (1) best approximate the known rating scores, and (2) maintain low Frobenius norm for the low-ranked matrices to prevent overfitting. Since the two objectives conflict with each other, the common practice is to assign the relative importance weights as the hyper-parameters to these objectives. The two low-ranked matrices returned by MF are often interpreted as the latent factors of a user and the latent factors of an item that would affect the rating of the user on the item. As a result, it is typical that, in the loss function, we assign a regularization weight $ \lambda_p $ on the norms of the latent factors for all users, and another regularization weight $ \lambda_q $ on the norms of the latent factors for all the items. We argue that such a methodology probably over-simplifies the scenario. Alternatively, we probably should assign lower constraints to the latent factors associated with the items or users that reveal more information, and set higher constraints to the others. In this article, we systematically study this topic. We found that such a simple technique can improve the prediction results of the MF-based approaches based on several public datasets. Specifically, we applied the proposed methodology on three baseline models --- SVD, SVD++, and the NMF models. We found that this technique improves the prediction accuracy for all these baseline models. Perhaps more importantly, this technique better predicts the ratings on the long-tail items, i.e., the items that were rated/viewed/purchased by few users. This suggests that this approach may partially remedy the cold-start issue. The proposed method is very general and can be easily applied on various recommendation models, such as Factorization Machines, Field-aware Factorization Machines, Factorizing Personalized Markov Chains, Prod2Vec, Behavior2Vec, and so on. We release the code for reproducibility. We implemented a Python package that integrates the proposed regularization technique with the SVD, SVD++, and the NMF model. The package can be accessed at https://github.com/ncu-dart/rdf.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sahoo:2019:LSO, author = "Doyen Sahoo and Steven C. H. Hoi and Bin Li", title = "Large Scale Online Multiple Kernel Regression with Application to Time-Series Prediction", journal = j-TKDD, volume = "13", number = "1", pages = "9:1--9:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3299875", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Kernel-based regression represents an important family of learning techniques for solving challenging regression tasks with non-linear patterns. Despite being studied extensively, most of the existing work suffers from two major drawbacks as follows: (i) they are often designed for solving regression tasks in a batch learning setting, making them not only computationally inefficient and but also poorly scalable in real-world applications where data arrives sequentially; and (ii) they usually assume that a fixed kernel function is given prior to the learning task, which could result in poor performance if the chosen kernel is inappropriate. To overcome these drawbacks, this work presents a novel scheme of Online Multiple Kernel Regression (OMKR), which sequentially learns the kernel-based regressor in an online and scalable fashion, and dynamically explore a pool of multiple diverse kernels to avoid suffering from a single fixed poor kernel so as to remedy the drawback of manual/heuristic kernel selection. The OMKR problem is more challenging than regular kernel-based regression tasks since we have to on-the-fly determine both the optimal kernel-based regressor for each individual kernel and the best combination of the multiple kernel regressors. We propose a family of OMKR algorithms for regression and discuss their application to time series prediction tasks including application to AR, ARMA, and ARIMA time series. We develop novel approaches to make OMKR scalable for large datasets, to counter the problems arising from an unbounded number of support vectors. We also explore the effect of kernel combination at prediction level and at the representation level. Finally, we conduct extensive experiments to evaluate the empirical performance on both real-world regression and times series prediction tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Barton:2019:CIG, author = "Tomas Barton and Tomas Bruna and Pavel Kordik", title = "Chameleon 2: an Improved Graph-Based Clustering Algorithm", journal = j-TKDD, volume = "13", number = "1", pages = "10:1--10:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3299876", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Traditional clustering algorithms fail to produce human-like results when confronted with data of variable density, complex distributions, or in the presence of noise. We propose an improved graph-based clustering algorithm called Chameleon 2, which overcomes several drawbacks of state-of-the-art clustering approaches. We modified the internal cluster quality measure and added an extra step to ensure algorithm robustness. Our results reveal a significant positive impact on the clustering quality measured by Normalized Mutual Information on 32 artificial datasets used in the clustering literature. This significant improvement is also confirmed on real-world datasets. The performance of clustering algorithms such as DBSCAN is extremely parameter sensitive, and exhaustive manual parameter tuning is necessary to obtain a meaningful result. All hierarchical clustering methods are very sensitive to cutoff selection, and a human expert is often required to find the true cutoff for each clustering result. We present an automated cutoff selection method that enables the Chameleon 2 algorithm to generate high-quality clustering in autonomous mode.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Murai:2019:CDU, author = "Fabricio Murai and Bruno Ribeiro and Don Towlsey and Pinghui Wang", title = "Characterizing Directed and Undirected Networks via Multidimensional Walks with Jumps", journal = j-TKDD, volume = "13", number = "1", pages = "11:1--11:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3299877", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Estimating distributions of node characteristics (labels) such as number of connections or citizenship of users in a social network via edge and node sampling is a vital part of the study of complex networks. Due to its low cost, sampling via a random walk (RW) has been proposed as an attractive solution to this task. Most RW methods assume either that the network is undirected or that walkers can traverse edges regardless of their direction. Some RW methods have been designed for directed networks where edges coming into a node are not directly observable. In this work, we propose Directed Unbiased Frontier Sampling (DUFS), a sampling method based on a large number of coordinated walkers, each starting from a node chosen uniformly at random. It applies to directed networks with invisible incoming edges because it constructs, in real time, an undirected graph consistent with the walkers trajectories, and its use of random jumps to prevent walkers from being trapped. DUFS generalizes previous RW methods and is suited for undirected networks and to directed networks regardless of in-edge visibility. We also propose an improved estimator of node label distribution that combines information from initial walker locations with subsequent RW observations. We evaluate DUFS, compare it to other RW methods, investigate the impact of its parameters on estimation accuracy and provide practical guidelines for choosing them. In estimating out-degree distributions, DUFS yields significantly better estimates of the head of the distribution than other methods, while matching or exceeding estimation accuracy of the tail. Last, we show that DUFS outperforms uniform sampling when estimating distributions of node labels of the top 10\% largest degree nodes, even when sampling a node uniformly has the same cost as RW steps.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2019:DAA, author = "Huan Wang and Jia Wu and Wenbin Hu and Xindong Wu", title = "Detecting and Assessing Anomalous Evolutionary Behaviors of Nodes in Evolving Social Networks", journal = j-TKDD, volume = "13", number = "1", pages = "12:1--12:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3299886", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Based on the performance of entire social networks, anomaly analysis for evolving social networks generally ignores the otherness of the evolutionary behaviors of different nodes, such that it is difficult to precisely identify the anomalous evolutionary behaviors of nodes ( AEBN ). Assuming that a node's evolutionary behavior that generates and removes edges normally follows stable evolutionary mechanisms, this study focuses on detecting and assessing AEBN, whose evolutionary mechanisms deviate from their past mechanisms, and proposes a link prediction detection ( LPD ) method and a matrix perturbation assessment ( MPA ) method. LPD describes a node's evolutionary behavior by fitting its evolutionary mechanism, and designs indexes for edge generation and removal to evaluate the extent to which the evolutionary mechanism of a node's evolutionary behavior can be fitted by a link prediction algorithm. Furthermore, it detects AEBN by quantifying the differences among behavior vectors that characterize the node's evolutionary behaviors in different periods. In addition, MPA considers AEBN as a perturbation of the social network structure, and quantifies the effect of AEBN on the social network structure based on matrix perturbation analysis. Extensive experiments on eight disparate real-world networks demonstrate that analyzing AEBN from the perspective of evolutionary mechanisms is important and beneficial.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{vanLeeuwen:2019:ASI, author = "Matthijs van Leeuwen and Polo Chau and Jilles Vreeken and Dafna Shahaf and Christos Faloutsos", title = "Addendum to the Special Issue on {Interactive Data Exploration and Analytics (TKDD, Vol. 12, Iss. 1): Introduction by the Guest Editors}", journal = j-TKDD, volume = "13", number = "1", pages = "13:1--13:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3298786", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jan 29 17:18:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Katib:2019:FAS, author = "Anas Katib and Praveen Rao and Kobus Barnard and Charles Kamhoua", title = "Fast Approximate Score Computation on Large-Scale Distributed Data for Learning Multinomial {Bayesian} Networks", journal = j-TKDD, volume = "13", number = "2", pages = "14:1--14:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301304", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301304", abstract = "In this article, we focus on the problem of learning a Bayesian network over distributed data stored in a commodity cluster. Specifically, we address the challenge of computing the scoring function over distributed data in an efficient and scalable manner, which is a fundamental task during learning. While exact score computation can be done using the MapReduce-style computation, our goal is to compute approximate scores much faster with probabilistic error bounds and in a scalable manner. We propose a novel approach, which is designed to achieve the following: (a) decentralized score computation using the principle of gossiping; (b) lower resource consumption via a probabilistic approach for maintaining scores using the properties of a Markov chain; and (c) effective distribution of tasks during score computation (on large datasets) by synergistically combining well-known hashing techniques. We conduct theoretical analysis of our approach in terms of convergence speed of the statistics required for score computation, and memory and network bandwidth consumption. We also discuss how our approach is capable of efficiently recomputing scores when new data are available. We conducted a comprehensive evaluation of our approach and compared with the MapReduce-style computation using datasets of different characteristics on a 16-node cluster. When the MapReduce-style computation provided exact statistics for score computation, it was nearly 10 times slower than our approach. Although it ran faster on randomly sampled datasets than on the entire datasets, it performed worse than our approach in terms of accuracy. Our approach achieved high accuracy (below 6\% average relative error) in estimating the statistics for approximate score computation on all the tested datasets. In conclusion, it provides a feasible tradeoff between computation time and accuracy for fast approximate score computation on large-scale distributed data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2019:TEM, author = "Xiaofeng Gao and Zhenhao Cao and Sha Li and Bin Yao and Guihai Chen and Shaojie Tang", title = "Taxonomy and Evaluation for Microblog Popularity Prediction", journal = j-TKDD, volume = "13", number = "2", pages = "15:1--15:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301303", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301303", abstract = "As social networks become a major source of information, predicting the outcome of information diffusion has appeared intriguing to both researchers and practitioners. By organizing and categorizing the joint efforts of numerous studies on popularity prediction, this article presents a hierarchical taxonomy and helps to establish a systematic overview of popularity prediction methods for microblog. Specifically, we uncover three lines of thoughts: the feature-based approach, time-series modelling, and the collaborative filtering approach and analyse them, respectively. Furthermore, we also categorize prediction methods based on their underlying rationale: whether they attempt to model the motivation of users or monitor the early responses. Finally, we put these prediction methods to test by performing experiments on real-life data collected from popular social networks Twitter and Weibo. We compare the methods in terms of accuracy, efficiency, timeliness, robustness, and bias. As far as we are concerned, there is no precedented survey aimed at microblog popularity prediction at the time of submission. By establishing a taxonomy and evaluation for the first time, we hope to provide an in-depth review of state-of-the-art prediction methods and point out directions for further research. Our evaluations show that time-series modelling has the advantage of high accuracy and the ability to improve over time. The feature-based methods using only temporal features performs nearly as well as using all possible features, producing average results. This suggests that temporal features do have strong predictive power and that power is better exploited with time-series models. On the other hand, this implies that we know little about the future popularity of an item before it is posted, which may be the focus of further research.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yan:2019:RBT, author = "Ruidong Yan and Yi Li and Weili Wu and Deying Li and Yongcai Wang", title = "Rumor Blocking through Online Link Deletion on Social Networks", journal = j-TKDD, volume = "13", number = "2", pages = "16:1--16:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301302", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301302", abstract = "In recent years, social networks have become important platforms for people to disseminate information. However, we need to take effective measures such as blocking a set of links to control the negative rumors spreading over the network. In this article, we propose a Rumor Spread Minimization (RSM) problem, i.e., we remove an edge set from network such that the rumor spread is minimized. We first prove the objective function of RSM problem is not submodular. Then, we propose both submodular lower-bound and upper-bound of the objective function. Next, we develop a heuristic algorithm to approximate the objective function. Furthermore, we reformulate our objective function as the DS function (the Difference of Submodular functions). Finally, we conduct experiments on real-world datasets to evaluate our proposed method. The experiment results show that the upper and lower bounds are very close, which indicates the good quality of them. And, the proposed method outperforms the comparison methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Teinemaa:2019:OOP, author = "Irene Teinemaa and Marlon Dumas and Marcello {La Rosa} and Fabrizio Maria Maggi", title = "Outcome-Oriented Predictive Process Monitoring: Review and Benchmark", journal = j-TKDD, volume = "13", number = "2", pages = "17:1--17:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301300", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301300", abstract = "Predictive business process monitoring refers to the act of making predictions about the future state of ongoing cases of a business process, based on their incomplete execution traces and logs of historical (completed) traces. Motivated by the increasingly pervasive availability of fine-grained event data about business process executions, the problem of predictive process monitoring has received substantial attention in the past years. In particular, a considerable number of methods have been put forward to address the problem of outcome-oriented predictive process monitoring, which refers to classifying each ongoing case of a process according to a given set of possible categorical outcomes --- e.g., Will the customer complain or not? Will an order be delivered, canceled, or withdrawn? Unfortunately, different authors have used different datasets, experimental settings, evaluation measures, and baselines to assess their proposals, resulting in poor comparability and an unclear picture of the relative merits and applicability of different methods. To address this gap, this article presents a systematic review and taxonomy of outcome-oriented predictive process monitoring methods, and a comparative experimental evaluation of eleven representative methods using a benchmark covering 24 predictive process monitoring tasks based on nine real-life event logs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2019:PBD, author = "Liang Ma and Mudhakar Srivatsa and Derya Cansever and Xifeng Yan and Sue Kase and Michelle Vanni", title = "Performance Bounds of Decentralized Search in Expert Networks for Query Answering", journal = j-TKDD, volume = "13", number = "2", pages = "18:1--18:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3300230", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3300230", abstract = "Expert networks are formed by a group of expert-professionals with different specialties to collaboratively resolve specific queries posted to the network. In such networks, when a query reaches an expert who does not have sufficient expertise, this query needs to be routed to other experts for further processing until it is completely solved; therefore, query answering efficiency is sensitive to the underlying query routing mechanism being used. Among all possible query routing mechanisms, decentralized search, operating purely on each expert's local information without any knowledge of network global structure, represents the most basic and scalable routing mechanism, which is applicable to any network scenarios even in dynamic networks. However, there is still a lack of fundamental understanding of the efficiency of decentralized search in expert networks. In this regard, we investigate decentralized search by quantifying its performance under a variety of network settings. Our key findings reveal the existence of network conditions, under which decentralized search can achieve significantly short query routing paths (i.e., between $ O(\log n) $ and $ O(\log^2 n) $ hops, $n$: total number of experts in the network). Based on such theoretical foundation, we further study how the unique properties of decentralized search in expert networks are related to the anecdotal small-world phenomenon. In addition, we demonstrate that decentralized search is robust against estimation errors introduced by misinterpreting the required expertise levels. The developed performance bounds, confirmed by real datasets, are able to assist in predicting network performance and designing complex expert networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jha:2019:DMD, author = "Kishlay Jha and Guangxu Xun and Vishrawas Gopalakrishnan and Aidong Zhang", title = "{DWE-Med}: Dynamic Word Embeddings for Medical Domain", journal = j-TKDD, volume = "13", number = "2", pages = "19:1--19:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310254", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310254", abstract = "Recent advances in unsupervised language processing methods have created an opportunity to exploit massive text corpora for developing high-quality vector space representation (also known as word embeddings) of words. Towards this direction, practitioners have developed and applied several data driven embedding models with quite good rate of success. However, a drawback of these models lies in their premise of static context; wherein, the meaning of a word is assumed to remain the same over the period of time. This is limiting because it is known that the semantic meaning of a concept evolves over time. While such semantic drifts are routinely observed in almost all the domains; their effect is acute in domain such as biomedicine, where the semantic meaning of a concept changes relatively fast. To address this, in this study, we aim to learn temporally aware vector representation of medical concepts from the timestamped text data, and in doing so provide a systematic approach to formalize the problem. More specifically, a dynamic word embedding based model that jointly learns the temporal characteristics of medical concepts and performs across time-alignment is proposed. Apart from capturing the evolutionary characteristics in an optimal manner, the model also factors in the implicit medical properties useful for a variety of bio-medical applications. Empirical studies conducted on two important bio-medical use cases validates the effectiveness of the proposed approach and suggests that the model not only learns quality embeddings but also facilitates intuitive trajectory visualizations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cadena:2019:NOP, author = "Jose Cadena and Feng Chen and Anil Vullikanti", title = "Near-Optimal and Practical Algorithms for Graph Scan Statistics with Connectivity Constraints", journal = j-TKDD, volume = "13", number = "2", pages = "20:1--20:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309712", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309712", abstract = "One fundamental task in network analysis is detecting ``hotspots'' or ``anomalies'' in the network; that is, detecting subgraphs where there is significantly more activity than one would expect given historical data or some baseline process. Scan statistics is one popular approach used for anomalous subgraph detection. This methodology involves maximizing a score function over all connected subgraphs, which is a challenging computational problem. A number of heuristics have been proposed for these problems, but they do not provide any quality guarantees. Here, we propose a framework for designing algorithms for optimizing a large class of scan statistics for networks, subject to connectivity constraints. Our algorithms run in time that scales linearly on the size of the graph and depends on a parameter we call the ``effective solution size,'' while providing rigorous approximation guarantees. In contrast, most prior methods have super-linear running times in terms of graph size. Extensive empirical evidence demonstrates the effectiveness and efficiency of our proposed algorithms in comparison with state-of-the-art methods. Our approach improves on the performance relative to all prior methods, giving up to over 25\% increase in the score. Further, our algorithms scale to networks with up to a million nodes, which is 1--2 orders of magnitude larger than all prior applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2019:PFS, author = "Bingbing Jiang and Chang Li and Maarten {De Rijke} and Xin Yao and Huanhuan Chen", title = "Probabilistic Feature Selection and Classification Vector Machine", journal = j-TKDD, volume = "13", number = "2", pages = "21:1--21:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309541", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309541", abstract = "Sparse Bayesian learning is a state-of-the-art supervised learning algorithm that can choose a subset of relevant samples from the input data and make reliable probabilistic predictions. However, in the presence of high-dimensional data with irrelevant features, traditional sparse Bayesian classifiers suffer from performance degradation and low efficiency due to the incapability of eliminating irrelevant features. To tackle this problem, we propose a novel sparse Bayesian embedded feature selection algorithm that adopts truncated Gaussian distributions as both sample and feature priors. The proposed algorithm, called probabilistic feature selection and classification vector machine (PFCVM$_{LP}$) is able to simultaneously select relevant features and samples for classification tasks. In order to derive the analytical solutions, Laplace approximation is applied to compute approximate posteriors and marginal likelihoods. Finally, parameters and hyperparameters are optimized by the type-II maximum likelihood method. Experiments on three datasets validate the performance of PFCVM$_{LP}$ along two dimensions: classification performance and effectiveness for feature selection. Finally, we analyze the generalization performance and derive a generalization error bound for PFCVM$_{LP}$. By tightening the bound, the importance of feature selection is demonstrated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2019:FST, author = "Zheng Wang and Xiaojun Ye and Chaokun Wang and Philip S. Yu", title = "Feature Selection via Transferring Knowledge Across Different Classes", journal = j-TKDD, volume = "13", number = "2", pages = "22:1--22:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314202", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314202", abstract = "The problem of feature selection has attracted considerable research interest in recent years. Supervised information is capable of significantly improving the quality of selected features. However, existing supervised feature selection methods all require that classes in the labeled data (source domain) and unlabeled data (target domain) to be identical, which may be too restrictive in many cases. In this article, we consider a more challenging cross-class setting where the classes in these two domains are related but different, which has rarely been studied before. We propose a cross-class knowledge transfer feature selection framework which transfers the cross-class knowledge from the source domain to guide target domain feature selection. Specifically, high-level descriptions, i.e., attributes, are used as the bridge for knowledge transfer. To further improve the quality of the selected features, our framework jointly considers the tasks of cross-class knowledge transfer and feature selection. Experimental results on four benchmark datasets demonstrate the superiority of the proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hong:2019:VGM, author = "Junyuan Hong and Yang Li and Huanhuan Chen", title = "Variant {Grassmann} Manifolds: a Representation Augmentation Method for Action Recognition", journal = j-TKDD, volume = "13", number = "2", pages = "23:1--23:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314203", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314203", abstract = "In classification tasks, classifiers trained with finite examples might generalize poorly to new data with unknown variance. For this issue, data augmentation is a successful solution where numerous artificial examples are added to training sets. In this article, we focus on the data augmentation for improving the accuracy of action recognition, where action videos are modeled by linear dynamical systems and approximately represented as linear subspaces. These subspace representations lie in a non-Euclidean space, named Grassmann manifold, containing points as orthonormal matrixes. It is our concern that poor generalization may result from the variance of manifolds when data come from different sources or classes. Thus, we introduce infinitely many variant Grassmann manifolds (VGM) subject to a known distribution, then represent each action video as different Grassmann points leading to augmented representations. Furthermore, a prior based on the stability of subspace bases is introduced, so the manifold distribution can be adaptively determined, balancing discrimination and representation. Experimental results of multi-class and multi-source classification show that VGM softmax classifiers achieve lower test error rates compared to methods with a single manifold.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2019:LLS, author = "Yumeng Guo and Fulai Chung and Guozheng Li and Jiancong Wang and James C. Gee", title = "Leveraging Label-Specific Discriminant Mapping Features for Multi-Label Learning", journal = j-TKDD, volume = "13", number = "2", pages = "24:1--24:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319911", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319911", abstract = "As an important machine learning task, multi-label learning deals with the problem where each sample instance (feature vector) is associated with multiple labels simultaneously. Most existing approaches focus on manipulating the label space, such as exploiting correlations between labels and reducing label space dimension, with identical feature space in the process of classification. One potential drawback of this traditional strategy is that each label might have its own specific characteristics and using identical features for all label cannot lead to optimized performance. In this article, we propose an effective algorithm named LSDM, i.e., leveraging label-specific discriminant mapping features for multi-label learning, to overcome the drawback. LSDM sets diverse ratio parameter values to conduct cluster analysis on the positive and negative instances of identical label. It reconstructs label-specific feature space which includes distance information and spatial topology information. Our experimental results show that combining these two parts of information in the new feature representation can better exploit the clustering results in the learning process. Due to the problem of diverse combinations for identical label, we employ simplified linear discriminant analysis to efficiently excavate optimal one for each label and perform classification by querying the corresponding results. Comparison with the state-of-the-art algorithms on a total of 20 benchmark datasets clearly manifests the competitiveness of LSDM.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gan:2019:SPS, author = "Wensheng Gan and Jerry Chun-Wei Lin and Philippe Fournier-Viger and Han-Chieh Chao and Philip S. Yu", title = "A Survey of Parallel Sequential Pattern Mining", journal = j-TKDD, volume = "13", number = "3", pages = "25:1--25:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314107", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314107", abstract = "With the growing popularity of shared resources, large volumes of complex data of different types are collected automatically. Traditional data mining algorithms generally have problems and challenges including huge memory cost, low processing speed, and inadequate hard disk space. As a fundamental task of data mining, sequential pattern mining (SPM) is used in a wide variety of real-life applications. However, it is more complex and challenging than other pattern mining tasks, i.e., frequent itemset mining and association rule mining, and also suffers from the above challenges when handling the large-scale data. To solve these problems, mining sequential patterns in a parallel or distributed computing environment has emerged as an important issue with many applications. In this article, an in-depth survey of the current status of parallel SPM (PSPM) is investigated and provided, including detailed categorization of traditional serial SPM approaches, and state-of-the art PSPM. We review the related work of PSPM in details including partition-based algorithms for PSPM, apriori-based PSPM, pattern-growth-based PSPM, and hybrid algorithms for PSPM, and provide deep description (i.e., characteristics, advantages, disadvantages, and summarization) of these parallel approaches of PSPM. Some advanced topics for PSPM, including parallel quantitative/weighted/utility SPM, PSPM from uncertain data and stream data, hardware acceleration for PSPM, are further reviewed in details. Besides, we review and provide some well-known open-source software of PSPM. Finally, we summarize some challenges and opportunities of PSPM in the big data era.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mahmoudi:2019:RBO, author = "Amin Mahmoudi and Mohd Ridzwan Yaakub and Azuraliza Abu Bakar", title = "The Relationship between Online Social Network Ties and User Attributes", journal = j-TKDD, volume = "13", number = "3", pages = "26:1--26:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314204", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314204", abstract = "The distance between users has an effect on the formation of social network ties, but it is not the only or even the main factor. Knowing all the features that influence such ties is very important for many related domains such as location-based recommender systems and community and event detection systems for online social networks (OSNs). In recent years, researchers have analyzed the role of user geo-location in OSNs. Researchers have also attempted to determine the probability of friendships being established based on distance, where friendship is not only a function of distance. However, some important features of OSNs remain unknown. In order to comprehensively understand the OSN phenomenon, we also need to analyze users' attributes. Basically, an OSN functions according to four main user properties: user geo-location, user weight, number of user interactions, and user lifespan. The research presented here sought to determine whether the user mobility pattern can be used to predict users' interaction behavior. It also investigated whether, in addition to distance, the number of friends (known as user weight) interferes in social network tie formation. To this end, we analyzed the above-stated features in three large-scale OSNs. We found that regardless of a high degree freedom in user mobility, the fraction of the number of outside activities over the inside activity is a significant fraction that helps us to address the user interaction behavior. To the best of our knowledge, research has not been conducted elsewhere on this issue. We also present a high-resolution formula in order to improve the friendship probability function.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2019:MTC, author = "Yao Zhou and Lei Ying and Jingrui He", title = "Multi-task Crowdsourcing via an Optimization Framework", journal = j-TKDD, volume = "13", number = "3", pages = "27:1--27:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310227", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310227", abstract = "The unprecedented amounts of data have catalyzed the trend of combining human insights with machine learning techniques, which facilitate the use of crowdsourcing to enlist label information both effectively and efficiently. One crucial challenge in crowdsourcing is the diverse worker quality, which determines the accuracy of the label information provided by such workers. Motivated by the observations that same set of tasks are typically labeled by the same set of workers, we studied their behaviors across multiple related tasks and proposed an optimization framework for learning from task and worker dual heterogeneity. The proposed method uses a weight tensor to represent the workers' behaviors across multiple tasks, and seeks to find the optimal solution of the tensor by exploiting its structured information. Then, we propose an iterative algorithm to solve the optimization problem and analyze its computational complexity. To infer the true label of an example, we construct a worker ensemble based on the estimated tensor, whose decisions will be weighted using a set of entropy weight. We also prove that the gradient of the most time-consuming updating block is separable with respect to the workers, which leads to a randomized algorithm with faster speed. Moreover, we extend the learning framework to accommodate to the multi-class setting. Finally, we test the performance of our framework on several datasets, and demonstrate its superiority over state-of-the-art techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2019:RRH, author = "Xuchao Zhang and Shuo Lei and Liang Zhao and Arnold P. Boedihardjo and Chang-Tien Lu", title = "Robust Regression via Heuristic Corruption Thresholding and Its Adaptive Estimation Variation", journal = j-TKDD, volume = "13", number = "3", pages = "28:1--28:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314105", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314105", abstract = "The presence of data noise and corruptions has recently invoked increasing attention on robust least-squares regression (RLSR), which addresses this fundamental problem that learns reliable regression coefficients when response variables can be arbitrarily corrupted. Until now, the following important challenges could not be handled concurrently: (1) rigorous recovery guarantee of regression coefficients, (2) difficulty in estimating the corruption ratio parameter, and (3) scaling to massive datasets. This article proposes a novel Robust regression algorithm via Heuristic Corruption Thresholding (RHCT) that concurrently addresses all the above challenges. Specifically, the algorithm alternately optimizes the regression coefficients and estimates the optimal uncorrupted set via heuristic thresholding without a pre-defined corruption ratio parameter until its convergence. Moreover, to improve the efficiency of corruption estimation in large-scale data, a Robust regression algorithm via Adaptive Corruption Thresholding (RACT) is proposed to determine the size of the uncorrupted set in a novel adaptive search method without iterating data samples exhaustively. In addition, we prove that our algorithms benefit from strong guarantees analogous to those of state-of-the-art methods in terms of convergence rates and recovery guarantees. Extensive experiments demonstrate that the effectiveness of our new methods is superior to that of existing methods in the recovery of both regression coefficients and uncorrupted sets, with very competitive efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2019:IDP, author = "Zhitao Wang and Chengyao Chen and Wenjie Li", title = "Information Diffusion Prediction with Network Regularized Role-based User Representation Learning", journal = j-TKDD, volume = "13", number = "3", pages = "29:1--29:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314106", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314106", abstract = "In this article, we aim at developing a user representation learning model to solve the information diffusion prediction problem in social media. The main idea is to project the diffusion users into a continuous latent space as the role-based (sender and receiver) representations, which capture unique diffusion characteristics of users. The model learns the role-based representations based on a cascade modeling objective that aims at maximizing the likelihood of observed cascades, and employs the matrix factorization objective of reconstructing structural proximities as a regularization on representations. By jointly embedding the information of cascades and network, the learned representations are robust on different diffusion data. We evaluate the proposed model on three real-world datasets. The experimental results demonstrate the better performance of the proposed model than state-of-the-art diffusion embedding and network embedding models and other popular graph-based methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ju:2019:TRB, author = "Fujiao Ju and Yanfeng Sun and Junbin Gao and Michael Antolovich and Junliang Dong and Baocai Yin", title = "Tensorizing Restricted {Boltzmann} Machine", journal = j-TKDD, volume = "13", number = "3", pages = "30:1--30:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3321517", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321517", abstract = "Restricted Boltzmann machine (RBM) is a famous model for feature extraction and can be used as an initializer for neural networks. When applying the classic RBM to multidimensional data such as 2D/3D tensors, one needs to vectorize such as high-order data. Vectorizing will result in dimensional disaster and valuable spatial information loss. As RBM is a model with fully connected layers, it requires a large amount of memory. Therefore, it is difficult to use RBM with high-order data on low-end devices. In this article, to utilize classic RBM on tensorial data directly, we propose a new tensorial RBM model parameterized by the tensor train format (TTRBM). In this model, both visible and hidden variables are in tensorial form, which are connected by a parameter matrix in tensor train format. The biggest advantage of the proposed model is that TTRBM can obtain comparable performance compared with the classic RBM with much fewer model parameters and faster training process. To demonstrate the advantages of TTRBM, we conduct three real-world applications, face reconstruction, handwritten digit recognition, and image super-resolution in the experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2019:LKI, author = "Chenyang Liu and Jian Cao and Shanshan Feng", title = "Leveraging Kernel-Incorporated Matrix Factorization for App Recommendation", journal = j-TKDD, volume = "13", number = "3", pages = "31:1--31:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3320482", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3320482", abstract = "The ever-increasing number of smartphone applications (apps) available on different app markets poses a challenge for personalized app recommendation. Conventional collaborative filtering-based recommendation methods suffer from sparse and binary user-app implicit feedback, which results in poor performance in discriminating user-app preferences. In this article, we first propose two kernel incorporated probabilistic matrix factorization models, which introduce app-categorical information to constrain the user and app latent features to be similar to their neighbors in the latent space. The two models are solved by Stochastic Gradient Descent with a user-oriented negative sampling scheme. To further improve the recommendation performance, we construct pseudo user-app ratings based on user-app usage information, and propose a novel kernelized non-negative matrix factorization by incorporating non-negative constraints on latent factors to predict user-app preferences. This model also leverages user--user and app--app similarities with regard to app-categorical information to mine the latent geometric structure in the pseudo-rating space. Adopting the Karush--Kuhn--Tucker conditions, a Multiplicative Updating Rules based optimization is proposed for model learning, and the convergence is proved by introducing an auxiliary function. The experimental results on a real user-app installation usage dataset show the comparable performance of our models with the state-of-the-art baselines in terms of two ranking-oriented evaluation metrics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dehghan:2019:TDE, author = "Mahdi Dehghan and Ahmad Ali Abin", title = "Translations Diversification for Expert Finding: a Novel Clustering-based Approach", journal = j-TKDD, volume = "13", number = "3", pages = "32:1--32:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3320489", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3320489", abstract = "Expert finding is the task of retrieving and ranking knowledgeable people in the subject of user's query. It is a well-studied problem that has attracted the attention of many researchers. The most important challenge in expert finding is to determine the similarity between query words and documents authored by candidate experts. One of the most important challenges in Information Retrieval (IR) community is the issue of vocabulary gap between queries and documents. In this study, a translation model based on words clustering in two query and co-occurrence spaces is proposed to overcome this problem. First, the words that are semantically close, are clustered in a query space and then each cluster in this space are clustered again in a co-occurrence space. Representatives of each cluster in the co-occurrence space are considered as a diverse subset of the parent cluster. By this method, the query translations are expected to be diversified in the query space. Next, a probabilistic model, that is based on the belonging degree of word to cluster and similarity of cluster to query in the query space, is used to consider the problem of vocabulary gap. Finally, the corresponding translations to each query are used in conjunction with a combination model for expert finding. Experiments on Stack Overflow dataset show the effectiveness of the proposed method for expert finding.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Iqbal:2019:BPE, author = "Mohsin Iqbal and Asim Karim and Faisal Kamiran", title = "Balancing Prediction Errors for Robust Sentiment Classification", journal = j-TKDD, volume = "13", number = "3", pages = "33:1--33:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3328795", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3328795", abstract = "Sentiment classification is a popular text mining task in which textual content (e.g., a message) is assigned a polarity label (typically positive or negative) reflecting the sentiment expressed in it. Sentiment classification is used widely in applications like customer feedback analysis where robustness and correctness of results are critical. In this article, we highlight that prediction accuracy alone is not sufficient for assessing the performance of a sentiment classifier; it is also important that the classifier is not biased toward positive or negative polarity, thus distorting the distribution of positive and negative messages in the predictions. We propose a measure, called Polarity Bias Rate, for quantifying this bias in a sentiment classifier. Second, we present two methods for removing this bias in the predictions of unsupervised and supervised sentiment classifiers. Our first method, called Bias-Aware Thresholding (BAT), shifts the decision boundary to control the bias in the predictions. Motivated from cost-sensitive learning, BAT is easily applicable to both lexicon-based unsupervised and supervised classifiers. Our second method, called Balanced Logistic Regression (BLR) introduces a bias-remover constraint into the standard logistic regression model. BLR is an automatic bias-free supervised sentiment classifier. We evaluate our methods extensively on seven real-world datasets. The experiments involve two lexicon-based and two supervised sentiment classifiers and include evaluation on multiple train-test data sizes. The results show that bias is controlled effectively in predictions. Furthermore, prediction accuracy is also increased in many cases, thus enhancing the robustness of sentiment classification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2019:ICS, author = "Mingyue Zhang and Xuan Wei and Xunhua Guo and Guoqing Chen and Qiang Wei", title = "Identifying Complements and Substitutes of Products: a Neural Network Framework Based on Product Embedding", journal = j-TKDD, volume = "13", number = "3", pages = "34:1--34:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3320277", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3320277", abstract = "Complements and substitutes are two typical product relationships that deserve consideration in online product recommendation. One of the key objectives of recommender systems is to promote cross-selling, which heavily relies on recommending the appropriate type of products in specific scenarios. Research on consumer behavior has shown that consumers usually prefer substitutes in the browsing stage whereas complements in the purchasing stage. Thus, it is of great importance to identify the complementary and substitutable relationships between products. In this article, we design a neural network based framework that integrates the textual content and non-textual information of online reviews to mine product relationships. For the textual content, we utilize methods such as LDA topic modeling to represent products in a succinct form called ``embedding.'' To capture the semantics of complementary and substitutable relationships, we design a modeling process that transfers the product embeddings into semantic features and incorporates additional non-textual factors of product reviews. Extensive experiments are conducted to verify the effectiveness of the proposed product relationship mining model. The advantages and robustness of our model are discussed from various perspectives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2019:RNC, author = "Yourong Huang and Zhu Xiao and Xiaoyou Yu and Dong Wang and Vincent Havyarimana and Jing Bai", title = "Road Network Construction with Complex Intersections Based on Sparsely Sampled Private Car Trajectory Data", journal = j-TKDD, volume = "13", number = "3", pages = "35:1--35:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3326060", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3326060", abstract = "A road network is a critical aspect of both urban planning and route recommendation. This article proposes an efficient approach to build a fine-grained road network based on sparsely sampled private car trajectory data under complex urban environment. In order to resolve difficulties introduced by low sampling rate trajectory data, we concentrate sample points around intersections by utilizing the turning characteristics from the large-scale trajectory data to ensure the accuracy of the detection of intersections and road segments. In front of complex road networks including many complex intersections, such as the overpasses and underpasses, we first layer intersections into major and minor one, and then propose a simplified representation of intersections and corresponding computable model based on the features of roads, which can significantly improve the accuracy of detected road networks, especially for the complex intersections. In order to construct fine-grained road networks, we distinguish various types of intersections using direction information and detected turning limit. To the best of our knowledge, our road network building method is the first time to give fine-grained road networks based on low-sampling rate private car trajectory data, especially able to infer the location of complex intersections and its connections to other intersections. Last but not the least, we propose an effective parameter selection process for the Density-Based Spatial Clustering of Applications with Noise based clustering algorithm, which is used to implement the reliable intersection detection. Extensive evaluations are conducted based on a real-world trajectory dataset from 1,345 private cars in Futian district, Shenzhen city of China. The results demonstrate the effectiveness of the proposed method. The constructed road network matches close to the one from a public editing map OpenStreetMap, especially the location of the road intersections and road segments, which achieves 92.2\% intersections within 20m and 91.6\% road segments within 8m.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dornaika:2019:ATP, author = "Fadi Dornaika", title = "Active Two Phase Collaborative Representation Classifier", journal = j-TKDD, volume = "13", number = "4", pages = "36:1--36:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3326919", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3326919", abstract = "The Sparse Representation Classifier, the Collaborative Representation Classifier (CRC), and the Two Phase Test Sample Sparse Representation (TPTSSR) classifier were introduced in recent times. All these frameworks are supervised and passive in the sense that they cannot benefit from unlabeled data samples. In this paper, inspired by active learning paradigms, we introduce an active CRC that can be used by these frameworks. More precisely, we are interested in the TPTSSR framework due to its good performance and its reasonable computational cost. Our proposed Active Two Phase Collaborative Representation Classifier (ATPCRC) starts by predicting the label of the available unlabeled samples. At testing stage, two coding processes are carried out separately on the set of originally labeled samples and the whole set (original and predicted label). The two types of class-wise reconstruction errors are blended in order to decide the class of any test image. Experiments conducted on four public image datasets show that the proposed ATPCRC can outperform the classic TPTSSR as well as many state-of-the-art methods that exploit label and unlabeled data samples.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2019:TSV, author = "Wenmain Yang and Kun Wang and Na Ruan and Wenyuan Gao and Weijia Jia and Wei Zhao and Nan Liu and Yunyong Zhang", title = "Time-Sync Video Tag Extraction Using Semantic Association Graph", journal = j-TKDD, volume = "13", number = "4", pages = "37:1--37:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3332932", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3332932", abstract = "Time-sync comments (TSCs) reveal a new way of extracting the online video tags. However, such TSCs have lots of noises due to users' diverse comments, introducing great challenges for accurate and fast video tag extractions. In this article, we propose an unsupervised video tag extraction algorithm named Semantic Weight-Inverse Document Frequency (SW-IDF). Specifically, we first generate corresponding semantic association graph (SAG) using semantic similarities and timestamps of the TSCs. Second, we propose two graph cluster algorithms, i.e., dialogue-based algorithm and topic center-based algorithm, to deal with the videos with different density of comments. Third, we design a graph iteration algorithm to assign the weight to each comment based on the degrees of the clustered subgraphs, which can differentiate the meaningful comments from the noises. Finally, we gain the weight of each word by combining Semantic Weight (SW) and Inverse Document Frequency (IDF). In this way, the video tags are extracted automatically in an unsupervised way. Extensive experiments have shown that SW-IDF (dialogue-based algorithm) achieves 0.4210 F1-score and 0.4932 MAP (Mean Average Precision) in high-density comments, 0.4267 F1-score and 0.3623 MAP in low-density comments; while SW-IDF (topic center-based algorithm) achieves 0.4444 F1-score and 0.5122 MAP in high-density comments, 0.4207 F1-score and 0.3522 MAP in low-density comments. It has a better performance than the state-of-the-art unsupervised algorithms in both F1-score and MAP.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Amelkin:2019:DMA, author = "Victor Amelkin and Petko Bogdanov and Ambuj K. Singh", title = "A Distance Measure for the Analysis of Polar Opinion Dynamics in Social Networks", journal = j-TKDD, volume = "13", number = "4", pages = "38:1--38:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3332168", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3332168", abstract = "Analysis of opinion dynamics in social networks plays an important role in today's life. For predicting users' political preference, it is particularly important to be able to analyze the dynamics of competing polar opinions, such as pro-Democrat vs. pro-Republican. While observing the evolution of polar opinions in a social network over time, can we tell when the network evolved abnormally? Furthermore, can we predict how the opinions of the users will change in the future? To answer such questions, it is insufficient to study individual user behavior, since opinions can spread beyond users' ego-networks. Instead, we need to consider the opinion dynamics of all users simultaneously and capture the connection between the individuals' behavior and the global evolution pattern of the social network. In this work, we introduce the Social Network Distance (SND)-a distance measure that quantifies the likelihood of evolution of one snapshot of a social network into another snapshot under a chosen model of polar opinion dynamics. SND has a rich semantics of a transportation problem, yet, is computable in time linear in the number of users and, as such, is applicable to large-scale online social networks. In our experiments with synthetic and Twitter data, we demonstrate the utility of our distance measure for anomalous event detection. It achieves a true positive rate of 0.83, twice as high as that of alternatives. The same predictions presented in precision-recall space show that SND retains perfect precision for recall up to 0.2. Its precision then decreases while maintaining more than 2-fold improvement over alternatives for recall up to 0.95. When used for opinion prediction in Twitter data, SND's accuracy is 75.6\%, which is 7.5\% higher than that of the next best method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2019:MCP, author = "Haoran Chen and Jinghua Li and Junbin Gao and Yanfeng Sun and Yongli Hu and Baocai Yin", title = "Maximally Correlated Principal Component Analysis Based on Deep Parameterization Learning", journal = j-TKDD, volume = "13", number = "4", pages = "39:1--39:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3332183", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3332183", abstract = "Dimensionality reduction is widely used to deal with high-dimensional data. As a famous dimensionality reduction method, principal component analysis (PCA) aiming at finding the low dimension feature of original data has made great successes, and many improved PCA algorithms have been proposed. However, most algorithms based on PCA only consider the linear correlation of data features. In this article, we propose a novel dimensionality reduction model called maximally correlated PCA based on deep parameterization learning (MCPCADP), which takes nonlinear correlation into account in the deep parameterization framework for the purpose of dimensionality reduction. The new model explores nonlinear correlation by maximizing Ky-Fan norm of the covariance matrix of nonlinearly mapped data features. A new BP algorithm for model optimization is derived. In order to assess the proposed method, we conduct experiments on both a synthetic database and several real-world databases. The experimental results demonstrate that the proposed algorithm is comparable to several widely used algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gallardo:2019:IVE, author = "Laura Fern{\'a}ndez Gallardo and Ramon Sanchez-Iborra", title = "On the Impact of Voice Encoding and Transmission on the Predictions of Speaker Warmth and Attractiveness", journal = j-TKDD, volume = "13", number = "4", pages = "40:1--40:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3332146", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3332146", abstract = "Modern human-computer interaction systems may not only be based on interpreting natural language but also on detecting speaker interpersonal characteristics in order to determine dialog strategies. This may be of high interest in different fields such as telephone marketing or automatic voice-based interactive services. However, when such systems encounter signals transmitted over a communication network instead of clean speech, e.g., in call centers, the speaker characterization accuracy might be impaired by the degradations caused in the speech signal by the encoding and communication processes. This article addresses a binary classification of high versus low warm--attractive speakers over different channel and encoding conditions. The ground truth is derived from ratings given to clean speech extracted from an extensive subjective test. Our results show that, under the considered conditions, the AMR-WB+ codec permits good levels of classification accuracy, comparable to the classification with clean, non-degraded speech. This is especially notable for the case of a Random Forest-based classifier, which presents the best performance among the set of evaluated algorithms. The impact of different packet loss rates has been examined, whereas jitter effects have been found to be negligible.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Comito:2019:BED, author = "Carmela Comito and Agostino Forestiero and Clara Pizzuti", title = "Bursty Event Detection in {Twitter} Streams", journal = j-TKDD, volume = "13", number = "4", pages = "41:1--41:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3332185", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3332185", abstract = "Social media, in recent years, have become an invaluable source of information for both public and private organizations to enhance the comprehension of people interests and the onset of new events. Twitter, especially, allows a fast spread of news and events happening real time that can contribute to situation awareness during emergency situations, but also to understand trending topics of a period. The article proposes an online algorithm that incrementally groups tweet streams into clusters. The approach summarizes the examined tweets into the cluster centroid by maintaining a number of textual and temporal features that allow the method to effectively discover groups of interest on particular themes. Experiments on messages posted by users addressing different issues, and a comparison with state-of-the-art approaches show that the method is capable to detect discussions regarding topics of interest, but also to distinguish bursty events revealed by a sudden spreading of attention on messages published by users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qiang:2019:HLT, author = "Jipeng Qiang and Ping Chen and Wei Ding and Tong Wang and Fei Xie and Xindong Wu", title = "Heterogeneous-Length Text Topic Modeling for Reader-Aware Multi-Document Summarization", journal = j-TKDD, volume = "13", number = "4", pages = "42:1--42:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3333030", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3333030", abstract = "More and more user comments like Tweets are available, which often contain user concerns. In order to meet the demands of users, a good summary generating from multiple documents should consider reader interests as reflected in reader comments. In this article, we focus on how to generate a summary from multi-document documents by considering reader comments, named as reader-aware multi-document summarization (RA-MDS). We present an innovative topic-based method for RA-MDA, which exploits latent topics to obtain the most salient and lessen redundancy summary from multiple documents. Since finding latent topics for RA-MDS is a crucial step, we also present a Heterogeneous-length Text Topic Modeling (HTTM) to extract topics from the corpus that includes both news reports and user comments, denoted as heterogeneous-length texts. In this case, the latent topics extract by HTTM cover not only important aspects of the event, but also aspects that attract reader interests. Comparisons on summary benchmark datasets also confirm that the proposed RA-MDS method is effective in improving the quality of extracted summaries. In addition, experimental results demonstrate that the proposed topic modeling method outperforms existing topic modeling algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2019:HDE, author = "Qingyang Li and Zhiwen Yu and Bin Guo and Huang Xu and Xinjiang Lu", title = "Housing Demand Estimation Based on Express Delivery Data", journal = j-TKDD, volume = "13", number = "4", pages = "43:1--43:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3332522", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3332522", abstract = "Housing demand estimation is an important topic in the field of economic research. It is beneficial and helpful for various applications including real estate market regulation and urban planning, and therefore is crucial for both real estate investors and government administrators. Meanwhile, given the rapid development of the express industry, abundant useful information is embedded in express delivery records, which is helpful for researchers in profiling urban life patterns. The express delivery behaviors of the residents in a residential community can reflect the housing demand to some extent. Although housing demand has been analyzed in previous studies, its estimation has not been very good, and the subject remains under explored. To this end, in this article, we propose a systematic housing demand estimation method based on express delivery data. First, the express delivery records are aggregated on the community scale with the use of clustering methods, and the missing values in the records are completed. Then, various features are extracted from a less sparse dataset considering both the probability of residential mobility and the attractiveness of residential communities. In addition, given that the correlations between different districts can influence the performances of the inference model, the commonalities and differences of different districts are considered. After obtaining the features and correlations between different districts being obtained, the housing demand is estimated by using a multi-task learning method based on neural networks. The experimental results for real-world data show that the proposed model is effective at estimating the housing demand at the residential community level.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sajadmanesh:2019:CTR, author = "Sina Sajadmanesh and Sogol Bazargani and Jiawei Zhang and Hamid R. Rabiee", title = "Continuous-Time Relationship Prediction in Dynamic Heterogeneous Information Networks", journal = j-TKDD, volume = "13", number = "4", pages = "44:1--44:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3333028", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3333028", abstract = "Online social networks, World Wide Web, media, and technological networks, and other types of so-called information networks are ubiquitous nowadays. These information networks are inherently heterogeneous and dynamic. They are heterogeneous as they consist of multi-typed objects and relations, and they are dynamic as they are constantly evolving over time. One of the challenging issues in such heterogeneous and dynamic environments is to forecast those relationships in the network that will appear in the future. In this article, we try to solve the problem of continuous-time relationship prediction in dynamic and heterogeneous information networks. This implies predicting the time it takes for a relationship to appear in the future, given its features that have been extracted by considering both heterogeneity and temporal dynamics of the underlying network. To this end, we first introduce a feature extraction framework that combines the power of meta-path-based modeling and recurrent neural networks to effectively extract features suitable for relationship prediction regarding heterogeneity and dynamicity of the networks. Next, we propose a supervised non-parametric approach, called Non-Parametric Generalized Linear Model (Np-Glm), which infers the hidden underlying probability distribution of the relationship building time given its features. We then present a learning algorithm to train Np-Glm and an inference method to answer time-related queries. Extensive experiments conducted on synthetic data and three real-world datasets, namely Delicious, MovieLens, and DBLP, demonstrate the effectiveness of Np-Glm in solving continuous-time relationship prediction problem vis-{\`a}-vis competitive baselines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ben-Gal:2019:CUT, author = "Irad Ben-Gal and Shahar Weinstock and Gonen Singer and Nicholas Bambos", title = "Clustering Users by Their Mobility Behavioral Patterns", journal = j-TKDD, volume = "13", number = "4", pages = "45:1--45:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322126", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322126", abstract = "The immense stream of data from mobile devices during recent years enables one to learn more about human behavior and provide mobile phone users with personalized services. In this work, we identify clusters of users who share similar mobility behavioral patterns. We analyze trajectories of semantic locations to find users who have similar mobility ``lifestyle,'' even when they live in different areas. For this task, we propose a new grouping scheme that is called Lifestyle-Based Clustering (LBC). We represent the mobility movement of each user by a Markov model and calculate the Jensen-Shannon distances among pairs of users. The pairwise distances are represented by a similarity matrix, which is used for the clustering. To validate the unsupervised clustering task, we develop an entropy-based clustering measure, namely, an index that measures the homogeneity of mobility patterns within clusters of users. The analysis is validated on a real-world dataset that contains location-movements of 50,000 cellular phone users that were analyzed over a two-month period.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2019:FGA, author = "Yanan Xu and Yanmin Zhu and Yanyan Shen and Jiadi Yu", title = "Fine-Grained Air Quality Inference with Remote Sensing Data and Ubiquitous Urban Data", journal = j-TKDD, volume = "13", number = "5", pages = "46:1--46:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340847", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340847", abstract = "Air quality has gained much attention in recent years and is of great importance to protecting people's health. Due to the influence of multiple factors, the limited air quality monitoring stations deployed in cities are unable to provide fine-grained air quality information. One cost-effective way is to infer air quality with records from existing monitoring stations. However, the severe data sparsity problem (e.g., only 0.2\% data are known) leads to the failure of most inference methods. We observe that remote sensing data are of high quality and have a strong correlation with the air quality. Therefore, we propose to integrate remote sensing data and ubiquitous urban data for the air quality inference. But there are two main challenges, i.e., data heterogeneity and incompleteness of the remote sensing data. To address the challenges, we propose a two-stage approach. In the first stage, we infer and predict air quality conditions of some places leveraging the remote sensing data and meteorological data with two proposed ANN-based methods, respectively. This stage significantly alleviates the data sparsity problem. In the second stage, the records and estimated air quality data are put in a tensor. A tensor decomposition method is applied to complete the tensor. The features extracted from urban data are classified into the spatial features (i.e., road features and POI features) and the temporal features (i.e., meteorological features) as the constraints to further address the data sparsity problem. In addition, an iterative training framework is proposed to improve the inference performance. Experiments on a real-world dataset show that our approach outperforms state-of-the-art methods, such as U-Air.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2019:PMM, author = "Xiren Zhou and Huanhuan Chen and Jinlong Li", title = "Probabilistic Mixture Model for Mapping the Underground Pipes", journal = j-TKDD, volume = "13", number = "5", pages = "47:1--47:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3344721", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3344721", abstract = "Buried pipes beneath our city are blood vessels that feed human civilization through the supply of water, gas, electricity, and so on, and mapping the buried pipes has long been addressed as an issue. In this article, a suitable coordinate of the detected area is established, the noisy Ground Penetrating Radar (GPR) and Global Positioning System (GPS) data are analyzed and normalized, and the pipeline is described mathematically. Based on these, the Probabilistic Mixture Model is proposed to map the buried pipes, which takes discrete noisy GPR and GPS data as the input and the accurate pipe locations and directions as the output. The proposed model consists of the Preprocessing, the Pipe Fitting algorithm, the Classification Fitting Expectation Maximization (CFEM) algorithm, and the Angle-limited Hough (Al-Hough) transform. The direction information of the detecting point is added into the measuring of the distance from the point to nearby pipelines, to handle some areas where the pipes are intersected or difficult to classify. The Expectation Maximization (EM) algorithm is upgraded to CFEM algorithm that is able to classify detecting points into different classes, and connect and fit multiple points in each class to get accurate pipeline locations and directions, and the Al-Hough transform provides reliable initializations for CFEM, to some extent, ensuring the convergence of the proposed model. The experimental results on the simulated and real-world datasets demonstrate the effectiveness of the proposed model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2019:BMS, author = "Fei Jiang and Guosheng Yin and Francesca Dominici", title = "{Bayesian} Model Selection Approach to Multiple Change-Points Detection with Non-Local Prior Distributions", journal = j-TKDD, volume = "13", number = "5", pages = "48:1--48:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340804", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340804", abstract = "We propose a Bayesian model selection (BMS) boundary detection procedure using non-local prior distributions for a sequence of data with multiple systematic mean changes. By using the non-local priors in the BMS framework, the BMS method can effectively suppress the non-boundary spike points with large instantaneous changes. Further, we speedup the algorithm by reducing the multiple change points to a series of single change point detection problems. We establish the consistency of the estimated number and locations of the change points under various prior distributions. From both theoretical and numerical perspectives, we show that the non-local inverse moment prior leads to the fastest convergence rate in identifying the true change points on the boundaries. Extensive simulation studies are conducted to compare the BMS with existing methods, and our method is illustrated with application to the magnetic resonance imaging guided radiation therapy data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2019:RTE, author = "Yun Wang and Guojie Song and Lun Du and Zhicong Lu", title = "Real-Time Estimation of the Urban Air Quality with Mobile Sensor System", journal = j-TKDD, volume = "13", number = "5", pages = "49:1--49:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3356584", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3356584", abstract = "Recently, real-time air quality estimation has attracted more and more attention from all over the world, which is close to our daily life. With the prevalence of mobile sensors, there is an emerging way to monitor the air quality with mobile sensors on vehicles. Compared with traditional expensive monitor stations, mobile sensors are cheaper and more abundant, but observations from these sensors have unstable spatial and temporal distributions, which results in the existing model could not work very well on this type of data. In this article, taking advantage of air quality data from mobile sensors, we propose an real-time urban air quality estimation method based on the Gaussian Process Regression for air pollution of the unmonitored areas, pivoting on the diffusion effect and the accumulation effect of air pollution. In order to meet the real-time demands, we propose a two-layer ensemble learning framework and a self-adaptivity mechanism to improve computational efficiency and adaptivity. We evaluate our model with real data from mobile sensor system located in Beijing, China. And the experiments show that our proposed model is superior to the state-of-the-art spatial regression methods in both precision and time performances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xue:2019:SAP, author = "Yu Xue and Bing Xue and Mengjie Zhang", title = "Self-Adaptive Particle Swarm Optimization for Large-Scale Feature Selection in Classification", journal = j-TKDD, volume = "13", number = "5", pages = "50:1--50:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340848", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340848", abstract = "Many evolutionary computation (EC) methods have been used to solve feature selection problems and they perform well on most small-scale feature selection problems. However, as the dimensionality of feature selection problems increases, the solution space increases exponentially. Meanwhile, there are more irrelevant features than relevant features in datasets, which leads to many local optima in the huge solution space. Therefore, the existing EC methods still suffer from the problem of stagnation in local optima on large-scale feature selection problems. Furthermore, large-scale feature selection problems with different datasets may have different properties. Thus, it may be of low performance to solve different large-scale feature selection problems with an existing EC method that has only one candidate solution generation strategy (CSGS). In addition, it is time-consuming to find a suitable EC method and corresponding suitable parameter values for a given large-scale feature selection problem if we want to solve it effectively and efficiently. In this article, we propose a self-adaptive particle swarm optimization (SaPSO) algorithm for feature selection, particularly for large-scale feature selection. First, an encoding scheme for the feature selection problem is employed in the SaPSO. Second, three important issues related to self-adaptive algorithms are investigated. After that, the SaPSO algorithm with a typical self-adaptive mechanism is proposed. The experimental results on 12 datasets show that the solution size obtained by the SaPSO algorithm is smaller than its EC counterparts on all datasets. The SaPSO algorithm performs better than its non-EC and EC counterparts in terms of classification accuracy not only on most training sets but also on most test sets. Furthermore, as the dimensionality of the feature selection problem increases, the advantages of SaPSO become more prominent. This highlights that the SaPSO algorithm is suitable for solving feature selection problems, particularly large-scale feature selection problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Crescenzi:2019:HCM, author = "Valter Crescenzi and Paolo Merialdo and Disheng Qiu", title = "Hybrid Crowd-Machine Wrapper Inference", journal = j-TKDD, volume = "13", number = "5", pages = "51:1--51:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3344720", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3344720", abstract = "Wrapper inference deals in generating programs to extract data from Web pages. Several supervised and unsupervised wrapper inference approaches have been proposed in the literature. On one hand, unsupervised approaches produce erratic wrappers: whenever the sources do not satisfy underlying assumptions of the inference algorithm, their accuracy is compromised. On the other hand, supervised approaches produce accurate wrappers, but since they need training data, their scalability is limited. The recent advent of crowdsourcing platforms has opened new opportunities for supervised approaches, as they make possible the production of large amounts of training data with the support of workers recruited online. Nevertheless, involving human workers has monetary costs. We present an original hybrid crowd-machine wrapper inference system that offers the benefits of both approaches exploiting the cooperation of crowd workers and unsupervised algorithms. Based on a principled probabilistic model that estimates the quality of wrappers, humans workers are recruited only when unsupervised wrapper induction algorithms are not able to produce sufficiently accurate solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2019:KSA, author = "Kun He and Pan Shi and David Bindel and John E. Hopcroft", title = "{Krylov} Subspace Approximation for Local Community Detection in Large Networks", journal = j-TKDD, volume = "13", number = "5", pages = "52:1--52:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3340708", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3340708", abstract = "Community detection is an important information mining task to uncover modular structures in large networks. For increasingly common large network datasets, global community detection is prohibitively expensive, and attention has shifted to methods that mine local communities, i.e., identifying all latent members of a particular community from a few labeled seed members. To address such semi-supervised mining task, we systematically develop a local spectral (LOSP) subspace-based community detection method, called LOSP. We define a family of LOSP subspaces based on Krylov subspaces, and seek a sparse indicator for the target community via an $ l_1 $ norm minimization over the Krylov subspace. Variants of LOSP depend on type of random walks with different diffusion speeds, type of random walks, dimension of the LOSP subspace, and step of diffusions. The effectiveness of the proposed LOSP approach is theoretically analyzed based on Rayleigh quotients, and it is experimentally verified on a wide variety of real-world networks across social, production, and biological domains, as well as on an extensive set of synthetic LFR benchmark datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bergamini:2019:CTK, author = "Elisabetta Bergamini and Michele Borassi and Pierluigi Crescenzi and Andrea Marino and Henning Meyerhenke", title = "Computing top-$k$ Closeness Centrality Faster in Unweighted Graphs", journal = j-TKDD, volume = "13", number = "5", pages = "53:1--53:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3344719", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3344719", abstract = "Given a connected graph $ G = (V, E) $, where $V$ denotes the set of nodes and $E$ the set of edges of the graph, the length (that is, the number of edges) of the shortest path between two nodes $v$ and $w$ is denoted by $ d(v, w)$. The closeness centrality of a vertex $v$ is then defined as $ n = 1 / \Sigma_{w \in V} d(v, w)$, where $ n = | V |$. This measure is widely used in the analysis of real-world complex networks, and the problem of selecting the $k$ most central vertices has been deeply analyzed in the last decade. However, this problem is computationally not easy, especially for large networks: in the first part of the article, we prove that it is not solvable in time $ O(| E |^{2 = \epsilon })$ on directed graphs, for any constant $ \epsilon > 0$, under reasonable complexity assumptions. Furthermore, we propose a new algorithm for selecting the $k$ most central nodes in a graph: we experimentally show that this algorithm improves significantly both the textbook algorithm, which is based on computing the distance between all pairs of vertices, and the state of the art. For example, we are able to compute the top $k$ nodes in few dozens of seconds in real-world networks with millions of nodes and edges. Finally, as a case study, we compute the 10 most central actors in the Internet Movie Database (IMDB) collaboration network, where two actors are linked if they played together in a movie, and in the Wikipedia citation network, which contains a directed edge from a page $p$ to a page $q$ if $p$ contains a link to $q$.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tatti:2019:DFG, author = "Nikolaj Tatti", title = "Density-Friendly Graph Decomposition", journal = j-TKDD, volume = "13", number = "5", pages = "54:1--54:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3344210", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:02 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3344210", abstract = "Decomposing a graph into a hierarchical structure via $k$-core analysis is a standard operation in any modern graph-mining toolkit. $k$-core decomposition is a simple and efficient method that allows to analyze a graph beyond its mere degree distribution. More specifically, it is used to identify areas in the graph of increasing centrality and connectedness, and it allows to reveal the structural organization of the graph. Despite the fact that $k$-core analysis relies on vertex degrees, $k$-cores do not satisfy a certain, rather natural, density property. Simply put, the most central $k$-core is not necessarily the densest subgraph. This inconsistency between $k$-cores and graph density provides the basis of our study. We start by defining what it means for a subgraph to be locally dense, and we show that our definition entails a nested chain decomposition of the graph, similar to the one given by $k$-cores, but in this case the components are arranged in order of increasing density. We show that such a locally dense decomposition for a graph $ G = (V, E)$ can be computed in polynomial time. The running time of the exact decomposition algorithm is $ O(| V |^2 | E |)$ but is significantly faster in practice. In addition, we develop a linear-time algorithm that provides a factor-2 approximation to the optimal locally dense decomposition. Furthermore, we show that the $k$-core decomposition is also a factor-2 approximation, however, as demonstrated by our experimental evaluation, in practice $k$-cores have different structure than locally dense subgraphs, and as predicted by the theory, $k$-cores are not always well-aligned with graph density.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2019:AAL, author = "Peisong Zhu and Zhuang Chen and Haojie Zheng and Tieyun Qian", title = "Aspect Aware Learning for Aspect Category Sentiment Analysis", journal = j-TKDD, volume = "13", number = "6", pages = "55:1--55:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3350487", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3350487", abstract = "Aspect category sentiment analysis (ACSA) is an underexploited subtask in aspect level sentiment analysis. It aims to identify the sentiment of predefined aspect categories. The main challenge in ACSA comes from the fact that the aspect category may not occur in the sentence in most of the cases. For example, the review `` they have delicious sandwiches '' positively talks about the aspect category `` food '' in an implicit manner. In this article, we propose a novel aspect aware learning (AAL) framework for ACSA tasks. Our key idea is to exploit the interaction between the aspect category and the contents under the guidance of both sentiment polarity and predefined categories. To this end, we design a two-way memory network for integrating AAL into the framework of sentiment classification. We further present two algorithms to incorporate the potential impacts of aspect categories. One is to capture the correlations between aspect terms and the aspect category like ``sandwiches'' and ``food.'' The other is to recognize the aspect category for sentiment representations like ``food'' for ``delicious.'' We conduct extensive experiments on four SemEval datasets. The results reveal the essential role of AAL in ACSA by achieving the state-of-the-art performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2019:UFM, author = "Yuandong Wang and Xuelian Lin and Hua Wei and Tianyu Wo and Zhou Huang and Yong Zhang and Jie Xu", title = "A Unified Framework with Multi-source Data for Predicting Passenger Demands of Ride Services", journal = j-TKDD, volume = "13", number = "6", pages = "56:1--56:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3355563", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3355563", abstract = "Ride-hailing applications have been offering convenient ride services for people in need. However, such applications still suffer from the issue of supply-demand disequilibrium, which is a typical problem for traditional taxi services. With effective predictions on passenger demands, we can alleviate the disequilibrium by pre-dispatching, dynamic pricing or avoiding dispatching cars to zero-demand areas. Existing studies of demand predictions mainly utilize limited data sources, trajectory data, or orders of ride services or both of them, which also lacks a multi-perspective consideration. In this article, we present a unified framework with a new combined model and a road-network-based spatial partition to leverage multi-source data and model the passenger demands from temporal, spatial, and zero-demand-area perspectives. In addition, our framework realizes offline training and online predicting, which can satisfy the real-time requirement more easily. We analyze and evaluate the performance of our combined model using the actual operational data from UCAR. The experimental results indicate that our model outperforms baselines on both Mean Absolute Error and Root Mean Square Error on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2019:CLL, author = "Shenghua Liu and Huawei Shen and Houdong Zheng and Xueqi Cheng and Xiangwen Liao", title = "{CT LIS}: Learning Influences and Susceptibilities through Temporal Behaviors", journal = j-TKDD, volume = "13", number = "6", pages = "57:1--57:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3363570", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3363570", abstract = "How to quantify influences between users, seeing that social network users influence each other in their temporal behaviors? Previous work has directly defined an independent model parameter to capture the interpersonal influence between each pair of users. To do so, these models need a parameter for each pair of users, which results in high-dimensional models becoming easily trapped into the overfitting problem. However, such models do not consider how influences depend on each other if influences are sent from the same user or if influences are received by the same user. Therefore, we propose a model that defines parameters for every user with a latent influence vector and a susceptibility vector, opposite to define influences on user pairs. Such low-dimensional representations naturally cause the interpersonal influences involving the same user to be coupled with each other, thus reducing the model's complexity. Additionally, the model can easily consider the temporal information and sentimental polarities of users' messages. Finally, we conduct extensive experiments on two real-world Microblog datasets, showing that our model with such representations achieves best performance on three prediction tasks, compared to the state-of-the-art and pair-wise baselines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2019:HUI, author = "Jimmy Ming-Tai Wu and Jerry Chun-Wei Lin and Ashish Tamrakar", title = "High-Utility Itemset Mining with Effective Pruning Strategies", journal = j-TKDD, volume = "13", number = "6", pages = "58:1--58:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3363571", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3363571", abstract = "High-utility itemset mining is a popular data mining problem that considers utility factors, such as quantity and unit profit of items besides frequency measure from the transactional database. It helps to find the most valuable and profitable products/items that are difficult to track by using only the frequent itemsets. An item might have a high-profit value which is rare in the transactional database and has a tremendous importance. While there are many existing algorithms to find high-utility itemsets (HUIs) that generate comparatively large candidate sets, our main focus is on significantly reducing the computation time with the introduction of new pruning strategies. The designed pruning strategies help to reduce the visitation of unnecessary nodes in the search space, which reduces the time required by the algorithm. In this article, two new stricter upper bounds are designed to reduce the computation time by refraining from visiting unnecessary nodes of an itemset. Thus, the search space of the potential HUIs can be greatly reduced, and the mining procedure of the execution time can be improved. The proposed strategies can also significantly minimize the transaction database generated on each node. Experimental results showed that the designed algorithm with two pruning strategies outperform the state-of-the-art algorithms for mining the required HUIs in terms of runtime and number of revised candidates. The memory usage of the designed algorithm also outperforms the state-of-the-art approach. Moreover, a multi-thread concept is also discussed to further handle the problem of big datasets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Henzgen:2019:MRD, author = "Sascha Henzgen and Eyke H{\"u}llermeier", title = "Mining Rank Data", journal = j-TKDD, volume = "13", number = "6", pages = "59:1--59:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3363572", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3363572", abstract = "The problem of frequent pattern mining has been studied quite extensively for various types of data, including sets, sequences, and graphs. Somewhat surprisingly, another important type of data, namely rank data, has received very little attention in data mining so far. In this article, we therefore address the problem of mining rank data, that is, data in the form of rankings (total orders) of an underlying set of items. More specifically, two types of patterns are considered, namely frequent rankings and dependencies between such rankings in the form of association rules. Algorithms for mining frequent rankings and frequent closed rankings are proposed and tested experimentally, using both synthetic and real data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Roseberry:2019:MLP, author = "Martha Roseberry and Bartosz Krawczyk and Alberto Cano", title = "Multi-Label Punitive {kNN} with Self-Adjusting Memory for Drifting Data Streams", journal = j-TKDD, volume = "13", number = "6", pages = "60:1--60:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3363573", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3363573", abstract = "In multi-label learning, data may simultaneously belong to more than one class. When multi-label data arrives as a stream, the challenges associated with multi-label learning are joined by those of data stream mining, including the need for algorithms that are fast and flexible, able to match both the speed and evolving nature of the stream. This article presents a punitive $k$ nearest neighbors algorithm with a self-adjusting memory (MLSAMPkNN) for multi-label, drifting data streams. The memory adjusts in size to contain only the current concept and a novel punitive system identifies and penalizes errant data examples early, removing them from the window. By retaining and using only data that are both current and beneficial, MLSAMPkNN is able to adapt quickly and efficiently to changes within the data stream while still maintaining a low computational complexity. Additionally, the punitive removal mechanism offers increased robustness to various data-level difficulties present in data streams, such as class imbalance and noise. The experimental study compares the proposal to 24 algorithms using 30 real-world and 15 artificial multi-label data streams on six multi-label metrics, evaluation time, and memory consumption. The superior performance of the proposed method is validated through non-parametric statistical analysis, proving both high accuracy and low time complexity. MLSAMPkNN is a versatile classifier, capable of returning excellent performance in diverse stream scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lei:2019:IRU, author = "Yu Lei and Wenjie Li", title = "Interactive Recommendation with User-Specific Deep Reinforcement Learning", journal = j-TKDD, volume = "13", number = "6", pages = "61:1--61:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3359554", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3359554", abstract = "In this article, we study a multi-step interactive recommendation problem for explicit-feedback recommender systems. Different from the existing works, we propose a novel user-specific deep reinforcement learning approach to the problem. Specifically, we first formulate the problem of interactive recommendation for each target user as a Markov decision process (MDP). We then derive a multi-MDP reinforcement learning task for all involved users. To model the possible relationships (including similarities and differences) between different users' MDPs, we construct user-specific latent states by using matrix factorization. After that, we propose a user-specific deep Q-learning (UDQN) method to estimate optimal policies based on the constructed user-specific latent states. Furthermore, we propose Biased UDQN (BUDQN) to explicitly model user-specific information by employing an additional bias parameter when estimating the Q-values for different users. Finally, we validate the effectiveness of our approach by comprehensive experimental results and analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lee:2019:AMG, author = "John Boaz Lee and Ryan A. Rossi and Sungchul Kim and Nesreen K. Ahmed and Eunyee Koh", title = "Attention Models in Graphs: a Survey", journal = j-TKDD, volume = "13", number = "6", pages = "62:1--62:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3363574", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3363574", abstract = "Graph-structured data arise naturally in many different application domains. By representing data as graphs, we can capture entities (i.e., nodes) as well as their relationships (i.e., edges) with each other. Many useful insights can be derived from graph-structured data as demonstrated by an ever-growing body of work focused on graph mining. However, in the real-world, graphs can be both large-with many complex patterns-and noisy, which can pose a problem for effective graph mining. An effective way to deal with this issue is to incorporate ``attention'' into graph mining solutions. An attention mechanism allows a method to focus on task-relevant parts of the graph, helping it to make better decisions. In this work, we conduct a comprehensive and focused survey of the literature on the emerging field of graph attention models. We introduce three intuitive taxonomies to group existing work. These are based on problem setting (type of input and output), the type of attention mechanism used, and the task (e.g., graph classification, link prediction). We motivate our taxonomies through detailed examples and use each to survey competing approaches from a unique standpoint. Finally, we highlight several challenges in the area and discuss promising directions for future work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2019:PCM, author = "Wangdong Yang and Kenli Li and Keqin Li", title = "A Pipeline Computing Method of {SpTV} for Three-Order Tensors on {CPU} and {GPU}", journal = j-TKDD, volume = "13", number = "6", pages = "63:1--63:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3363575", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3363575", abstract = "Tensors have drawn a growing attention in many applications, such as physics, engineering science, social networks, recommended systems. Tensor decomposition is the key to explore the inherent intrinsic data relationship of tensor. There are many sparse tensor and vector multiplications (SpTV) in tensor decomposition. We analyze a variety of storage formats of sparse tensors and develop a piecewise compression strategy to improve the storage efficiency of large sparse tensors. This compression strategy can avoid storing a large number of empty slices and empty fibers in sparse tensors, and thus the storage space is significantly reduced. A parallel algorithm for the SpTV based on the high-order compressed format based on slices is designed to greatly improve its computing performance on graphics processing unit. Each tensor is cut into multiple slices to form a series of sparse matrix and vector multiplications, which form the pipelined parallelism. The transmission time of the slices can be hidden through pipelined parallel to further optimize the performance of the SpTV.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2019:RMS, author = "Yu Zhou and Jianbin Huang and Heli Sun and Yizhou Sun and Shaojie Qiao and Stephen Wambura", title = "Recurrent Meta-Structure for Robust Similarity Measure in Heterogeneous Information Networks", journal = j-TKDD, volume = "13", number = "6", pages = "64:1--64:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3364226", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3364226", abstract = "Similarity measure is one of the fundamental task in heterogeneous information network (HIN) analysis. It has been applied to many areas, such as product recommendation, clustering, and Web search. Most of the existing metrics can provide personalized services for users by taking a meta-path or meta-structure as input. However, these metrics may highly depend on the user-specified meta-path or meta-structure. In addition, users must know how to select an appropriate meta-path or meta-structure. In this article, we propose a novel similarity measure in HINs, called Recurrent Meta-Structure (RecurMS)-based Similarity (RMSS). The RecurMS as a schematic structure in HINs provides a unified framework for integrating all of the meta-paths and meta-structures, and can be constructed automatically by means of repetitively traversing the network schema. In order to formalize the semantics, the RecurMS is decomposed into several recurrent meta-paths and recurrent meta-trees, and we then define the commuting matrices of the recurrent meta-paths and meta-trees. All of these commuting matrices are combined together according to different weights. We propose two kinds of weighting strategies to determine the weights. The first is called the local weighting strategy that depends on the sparsity of the commuting matrices, and the second is called the global weighting strategy that depends on the strength of the commuting matrices. As a result, RMSS is defined by means of the weighted summation of the commuting matrices. Note that RMSS can also provide personalized services for users by means of the weights of the recurrent meta-paths and meta-trees. Experimental evaluations show that the proposed RMSS is robust and outperforms the existing metrics in terms of ranking and clustering task.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Veloso:2019:SSM, author = "Br{\'a}ulio M. Veloso and Renato M. Assun{\c{c}}{\~a}o and Anderson A. Ferreira and Nivio Ziviani", title = "In Search of a Stochastic Model for the E-News Reader", journal = j-TKDD, volume = "13", number = "6", pages = "65:1--65:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3362695", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Dec 18 14:31:03 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3362695", abstract = "E-news readers have increasingly at their disposal a broad set of news articles to read. Online newspaper sites use recommender systems to predict and to offer relevant articles to their users. Typically, these recommender systems do not leverage users' reading behavior. If we know how the topics-reads change in a reading session, we may lead to fine-tuned recommendations, for example, after reading a certain number of sports items, it may be counter-productive to keep recommending other sports news. The motivation for this article is the assumption that understanding user behavior when reading successive online news articles can help in developing better recommender systems. We propose five categories of stochastic models to describe this behavior depending on how the previous reading history affects the future choices of topics. We instantiated these five classes with many different stochastic processes covering short-term memory, revealed-preference, cumulative advantage, and geometric sojourn models. Our empirical study is based on large datasets of E-news from two online newspapers. We collected data from more than 13 million users who generated more than 23 million reading sessions, each one composed by the successive clicks of the users on the posted news. We reduce each user session to the sequence of reading news topics. The models were fitted and compared using the Akaike Information Criterion and the Brier Score. We found that the best models are those in which the user moves through topics influenced only by their most recent readings. Our models were also better to predict the next reading than the recommender systems currently used in these journals showing that our models can improve user satisfaction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hasan:2020:NSA, author = "Md Kamrul Hasan and Christopher Pal", title = "A New Smooth Approximation to the Zero One Loss with a Probabilistic Interpretation", journal = j-TKDD, volume = "14", number = "1", pages = "1:1--1:28", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365672", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365672", abstract = "We examine a new form of smooth approximation to the zero one loss in which learning is performed using a reformulation of the widely used logistic function. Our approach is based on using the posterior mean of a novel generalized Beta-Bernoulli \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mitra:2020:UMV, author = "Sayantan Mitra and Mohammed Hasanuzzaman and Sriparna Saha", title = "A Unified Multi-view Clustering Algorithm Using Multi-objective Optimization Coupled with Generative Model", journal = j-TKDD, volume = "14", number = "1", pages = "2:1--2:31", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365673", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365673", abstract = "There is a large body of works on multi-view clustering that exploit multiple representations (or views) of the same input data for better convergence. These multiple views can come from multiple modalities (image, audio, text) or different feature \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ni:2020:LOC, author = "Li Ni and Wenjian Luo and Wenjie Zhu and Bei Hua", title = "Local Overlapping Community Detection", journal = j-TKDD, volume = "14", number = "1", pages = "3:1--3:25", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3361739", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3361739", abstract = "Local community detection refers to finding the community that contains the given node based on local information, which becomes very meaningful when global information about the network is unavailable or expensive to acquire. Most studies on local \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Angiulli:2020:CCF, author = "Fabrizio Angiulli", title = "{CFOF}: a Concentration Free Measure for Anomaly Detection", journal = j-TKDD, volume = "14", number = "1", pages = "4:1--4:53", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362158", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362158", abstract = "We present a novel notion of outlier, called the Concentration Free Outlier Factor, or CFOF. As a main contribution, we formalize the notion of concentration of outlier scores and theoretically prove that CFOF does not concentrate in the Euclidean space \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Arifuzzaman:2020:FPA, author = "Shaikh Arifuzzaman and Maleq Khan and Madhav Marathe", title = "Fast Parallel Algorithms for Counting and Listing Triangles in Big Graphs", journal = j-TKDD, volume = "14", number = "1", pages = "5:1--5:34", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365676", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365676", abstract = "Big graphs (networks) arising in numerous application areas pose significant challenges for graph analysts as these graphs grow to billions of nodes and edges and are prohibitively large to fit in the main memory. Finding the number of triangles in a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kuang:2020:TEE, author = "Kun Kuang and Peng Cui and Bo Li and Meng Jiang and Yashen Wang and Fei Wu and Shiqiang Yang", title = "Treatment Effect Estimation via Differentiated Confounder Balancing and Regression", journal = j-TKDD, volume = "14", number = "1", pages = "6:1--6:25", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365677", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365677", abstract = "Treatment effect plays an important role on decision making in many fields, such as social marketing, healthcare, and public policy. The key challenge on estimating treatment effect in the wild observational studies is to handle confounding bias induced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jan:2020:ECC, author = "Zohaib Md. Jan and Brijesh Verma", title = "Evolutionary Classifier and Cluster Selection Approach for Ensemble Classification", journal = j-TKDD, volume = "14", number = "1", pages = "7:1--7:18", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366633", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366633", abstract = "Ensemble classifiers improve the classification performance by combining several classifiers using a suitable fusion methodology. Many ensemble classifier generation methods have been developed that allowed the training of multiple classifiers on a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Queiroz-Sousa:2020:ROT, author = "Paulo Orlando Queiroz-Sousa and Ana Carolina Salgado", title = "A Review on {OLAP} Technologies Applied to Information Networks", journal = j-TKDD, volume = "14", number = "1", pages = "8:1--8:25", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3370912", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3370912", abstract = "Many real systems produce network data or highly interconnected data, which can be called information networks. These information networks form a critical component in modern information infrastructure, constituting a large graph data volume. The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nie:2020:ALL, author = "Feiping Nie and Zheng Wang and Rong Wang and Zhen Wang and Xuelong Li", title = "Adaptive Local Linear Discriminant Analysis", journal = j-TKDD, volume = "14", number = "1", pages = "9:1--9:19", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369870", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369870", abstract = "Dimensionality reduction plays a significant role in high-dimensional data processing, and Linear Discriminant Analysis (LDA) is a widely used supervised dimensionality reduction approach. However, a major drawback of LDA is that it is incapable of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2020:ILS, author = "Xinjiang Lu and Zhiwen Yu and Chuanren Liu and Yanchi Liu and Hui Xiong and Bin Guo", title = "Inferring Lifetime Status of Point-of-Interest: a Multitask Multiclass Approach", journal = j-TKDD, volume = "14", number = "1", pages = "10:1--10:27", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369799", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369799", abstract = "A Point-of-Interest (POI) refers to a specific location that people may find useful or interesting. In modern cities, a large number of POIs emerge, grow, stabilize for a period, then finally disappear. The stages (e.g., emerge and grow) in this process \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Galimberti:2020:CDM, author = "Edoardo Galimberti and Francesco Bonchi and Francesco Gullo and Tommaso Lanciano", title = "Core Decomposition in Multilayer Networks: Theory, Algorithms, and Applications", journal = j-TKDD, volume = "14", number = "1", pages = "11:1--11:40", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369872", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Feb 6 07:36:59 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369872", abstract = "Multilayer networks are a powerful paradigm to model complex systems, where multiple relations occur between the same entities. Despite the keen interest in a variety of tasks, algorithms, and analyses in this type of network, the problem of extracting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shin:2020:FAP, author = "Kijung Shin and Sejoon Oh and Jisu Kim and Bryan Hooi and Christos Faloutsos", title = "Fast, Accurate and Provable Triangle Counting in Fully Dynamic Graph Streams", journal = j-TKDD, volume = "14", number = "2", pages = "12:1--12:39", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375392", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375392", abstract = "Given a stream of edge additions and deletions, how can we estimate the count of triangles in it? If we can store only a subset of the edges, how can we obtain unbiased estimates with small variances? Counting triangles (i.e., cliques of size three) in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Han:2020:GLS, author = "Huimei Han and Xingquan Zhu and Ying Li", title = "Generalizing Long Short-Term Memory Network for Deep Learning from Generic Data", journal = j-TKDD, volume = "14", number = "2", pages = "13:1--13:28", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366022", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366022", abstract = "Long Short-Term Memory (LSTM) network, a popular deep-learning model, is particularly useful for data with temporal correlation, such as texts, sequences, or time series data, thanks to its well-sought after recurrent network structures designed to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2020:BCR, author = "Chi-Chun Lin and Kun-Ta Chuang and Wush Chi-Hsuan Wu and Ming-Syan Chen", title = "Budget-Constrained Real-Time Bidding Optimization: Multiple Predictors Make It Better", journal = j-TKDD, volume = "14", number = "2", pages = "14:1--14:27", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375393", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375393", abstract = "In this article, we pursue a better solution for the promising problem, i.e., the bidding strategy design, in the real-time bidding (RTB) advertising (AD) environment. Under the budget constraint, the design of an optimal strategy for bidding on each \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yan:2020:MTI, author = "Xiaoqiang Yan and Zhengzheng Lou and Shizhe Hu and Yangdong Ye", title = "Multi-task Information Bottleneck Co-clustering for Unsupervised Cross-view Human Action Categorization", journal = j-TKDD, volume = "14", number = "2", pages = "15:1--15:23", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375394", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375394", abstract = "The widespread adoption of low-cost cameras generates massive amounts of videos recorded from different viewpoints every day. To cope with this vast amount of unlabeled and heterogeneous data, a new multi-task information bottleneck co-clustering (MIBC) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2020:BRP, author = "Bo Lin and Wei Luo and Zhiling Luo and Bo Wang and Shuiguang Deng and Jianwei Yin and Mengchu Zhou", title = "Bradykinesia Recognition in {Parkinson}'s Disease via Single {RGB} Video", journal = j-TKDD, volume = "14", number = "2", pages = "16:1--16:19", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369438", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369438", abstract = "Parkinson's disease is a progressive nervous system disorder afflicting millions of patients. Among its motor symptoms, bradykinesia is one of the cardinal manifestations. Experienced doctors are required for the clinical diagnosis of bradykinesia, but \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2020:RTT, author = "Shuai Liu and Guojie Song and Wenhao Huang", title = "Real-time Transportation Prediction Correction using Reconstruction Error in Deep Learning", journal = j-TKDD, volume = "14", number = "2", pages = "17:1--17:20", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369871", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369871", abstract = "In online complex systems such as transportation system, an important work is real-time traffic prediction. Due to the data shift, data model inconsistency, and sudden change of traffic patterns (like transportation accident), the prediction result \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dai:2020:CVE, author = "Chenglong Dai and Dechang Pi and Stefanie I. Becker and Jia Wu and Lin Cui and Blake Johnson", title = "{CenEEGs}: Valid {EEG} Selection for Classification", journal = j-TKDD, volume = "14", number = "2", pages = "18:1--18:25", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371153", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371153", abstract = "This article explores valid brain electroencephalography (EEG) selection for EEG classification with different classifiers, which has been rarely addressed in previous studies and is mostly ignored by existing EEG processing methods and applications. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dong:2020:RCP, author = "Jialin Dong and Kai Yang and Yuanming Shi", title = "Ranking from Crowdsourced Pairwise Comparisons via Smoothed {Riemannian} Optimization", journal = j-TKDD, volume = "14", number = "2", pages = "19:1--19:26", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372407", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372407", abstract = "Social Internet of Things has recently become a promising paradigm for augmenting the capability of humans and devices connected in the networks to provide services. In social Internet of Things network, crowdsourcing that collects the intelligence of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2020:ANA, author = "Yanan Xu and Yanyan Shen and Yanmin Zhu and Jiadi Yu", title = "{AR 2 Net}: an Attentive Neural Approach for Business Location Selection with Satellite Data and Urban Data", journal = j-TKDD, volume = "14", number = "2", pages = "20:1--20:28", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372406", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372406", abstract = "Business location selection is crucial to the success of businesses. Traditional approaches like manual survey investigate multiple factors, such as foot traffic, neighborhood structure, and available workforce, which are typically hard to measure. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Concas:2020:MSM, author = "Francesco Concas and Pengfei Xu and Mohammad A. Hoque and Jiaheng Lu and Sasu Tarkoma", title = "Multiple Set Matching with {Bloom} Matrix and {Bloom} Vector", journal = j-TKDD, volume = "14", number = "2", pages = "21:1--21:21", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372409", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372409", abstract = "Bloom Filter is a space-efficient probabilistic data structure for checking the membership of elements in a set. Given multiple sets, a standard Bloom Filter is not sufficient when looking for the items to which an element or a set of input elements \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2020:CDM, author = "Pei-Zhen Li and Ling Huang and Chang-Dong Wang and Jian-Huang Lai and Dong Huang", title = "Community Detection by Motif-Aware Label Propagation", journal = j-TKDD, volume = "14", number = "2", pages = "22:1--22:19", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3378537", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378537", abstract = "Community detection (or graph clustering) is crucial for unraveling the structural properties of complex networks. As an important technique in community detection, label propagation has shown the advantage of finding a good community structure with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2020:NAF, author = "Yuan Guo and Yu Sun and Kai Wu and Kerong Jiang", title = "New Algorithms of Feature Selection and Big Data Assignment for {CBR} System Integrated by {Bayesian} Network", journal = j-TKDD, volume = "14", number = "2", pages = "23:1--23:20", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3373086", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3373086", abstract = "Under big data, the integrated system of case-based reasoning and Bayesian network has exhibited great advantage in implementing the intelligence of engineering application in many domains. To further improve the performance of the hybrid system, this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hua:2020:PTM, author = "Ting Hua and Chang-Tien Lu and Jaegul Choo and Chandan K. Reddy", title = "Probabilistic Topic Modeling for Comparative Analysis of Document Collections", journal = j-TKDD, volume = "14", number = "2", pages = "24:1--24:27", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3369873", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Mar 10 08:50:37 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3369873", abstract = "Probabilistic topic models, which can discover hidden patterns in documents, have been extensively studied. However, rather than learning from a single document collection, numerous real-world applications demand a comprehensive understanding of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lee:2020:LDS, author = "Kwang Hee Lee and Myoung Ho Kim", title = "Linearization of Dependency and Sampling for Participation-based Betweenness Centrality in Very Large {$B$}-hypergraphs", journal = j-TKDD, volume = "14", number = "3", pages = "25:1--25:41", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375399", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375399", abstract = "A B-hypergraph consisting of nodes and directed hyperedges is a generalization of the directed graph. A directed hyperedge in the B-hypergraph represents a relation from a set of source nodes to a single destination node. We suggest one possible \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bian:2020:MSM, author = "Jiang Bian and Haoyi Xiong and Yanjie Fu and Jun Huan and Zhishan Guo", title = "{MP$^2$SDA}: Multi-Party Parallelized Sparse Discriminant Learning", journal = j-TKDD, volume = "14", number = "3", pages = "26:1--26:22", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3374919", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3374919", abstract = "Sparse Discriminant Analysis (SDA) has been widely used to improve the performance of classical Fisher's Linear Discriminant Analysis in supervised metric learning, feature selection, and classification. With the increasing needs of distributed data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2020:ERF, author = "Lei Tang and Zihang Liu and Yaling Zhao and Zongtao Duan and Jingchi Jia", title = "Efficient Ridesharing Framework for Ride-matching via Heterogeneous Network Embedding", journal = j-TKDD, volume = "14", number = "3", pages = "27:1--27:24", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3373839", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3373839", abstract = "Ridesharing has attracted increasing attention in recent years, and combines the flexibility and speed of private cars with the reduced cost of fixed-line systems to benefit alleviating traffic pressure. A major issue in ridesharing is the accurate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ermis:2020:DSD, author = "Beyza Ermis and A. Taylan CemgIl", title = "Data Sharing via Differentially Private Coupled Matrix Factorization", journal = j-TKDD, volume = "14", number = "3", pages = "28:1--28:27", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372408", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372408", abstract = "We address the privacy-preserving data-sharing problem in a distributed multiparty setting. In this setting, each data site owns a distinct part of a dataset and the aim is to estimate the parameters of a statistical model conditioned on the complete \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2020:CIM, author = "Yu Yang and Xiangbo Mao and Jian Pei and Xiaofei He", title = "Continuous Influence Maximization", journal = j-TKDD, volume = "14", number = "3", pages = "29:1--29:38", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3380928", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380928", abstract = "Imagine we are introducing a new product through a social network, where we know for each user in the network the function of purchase probability with respect to discount. Then, what discounts should we offer to those social network users so that, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ostovar:2020:RDC, author = "Alireza Ostovar and Sander J. J. Leemans and Marcello {La Rosa}", title = "Robust Drift Characterization from Event Streams of Business Processes", journal = j-TKDD, volume = "14", number = "3", pages = "30:1--30:57", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3375398", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3375398", abstract = "Process workers may vary the normal execution of a business process to adjust to changes in their operational environment, e.g., changes in workload, season, or regulations. Changes may be simple, such as skipping an individual activity, or complex, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2020:SFE, author = "Bang Liu and Fred X. Han and Di Niu and Linglong Kong and Kunfeng Lai and Yu Xu", title = "{Story Forest}: Extracting Events and Telling Stories from Breaking News", journal = j-TKDD, volume = "14", number = "3", pages = "31:1--31:28", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377939", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377939", abstract = "Extracting events accurately from vast news corpora and organize events logically is critical for news apps and search engines, which aim to organize news information collected from the Internet and present it to users in the most sensible forms. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Akhtar:2020:DMT, author = "Md Shad Akhtar and Dushyant Singh Chauhan and Asif Ekbal", title = "A Deep Multi-task Contextual Attention Framework for Multi-modal Affect Analysis", journal = j-TKDD, volume = "14", number = "3", pages = "32:1--32:27", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3380744", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380744", abstract = "Multi-modal affect analysis (e.g., sentiment and emotion analysis) is an interdisciplinary study and has been an emerging and prominent field in Natural Language Processing and Computer Vision. The effective fusion of multiple modalities (e.g., text, \ldots{}).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Abd-Elaziz:2020:EDM, author = "M. M. Abd-Elaziz and Hazem M. El-Bakry and Ahmed Abou Elfetouh and Amira Elzeiny", title = "Enhanced Data Mining Technique to Measure Satisfaction Degree of Social Media Users of {Xeljanz} Drug", journal = j-TKDD, volume = "14", number = "3", pages = "33:1--33:13", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3389433", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3389433", abstract = "In the recent times, social media has become important in the field of health care as a major resource of valuable health information. Social media can provide massive amounts of data in real-time through user interaction, and this data can be analysed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tuomo:2020:BCC, author = "Alasalmi Tuomo and Jaakko Suutala and Juha R{\"o}ning and Heli Koskim{\"a}ki", title = "Better Classifier Calibration for Small Datasets", journal = j-TKDD, volume = "14", number = "3", pages = "34:1--34:19", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385656", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385656", abstract = "Classifier calibration does not always go hand in hand with the classifier's ability to separate the classes. There are applications where good classifier calibration, i.e., the ability to produce accurate probability estimates, is more important than \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Amornbunchornvej:2020:FIF, author = "Chainarong Amornbunchornvej and Tanya Berger-Wolf", title = "Framework for Inferring Following Strategies from Time Series of Movement Data", journal = j-TKDD, volume = "14", number = "3", pages = "35:1--35:22", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385730", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385730", abstract = "How do groups of individuals achieve consensus in movement decisions? Do individuals follow their friends, the one predetermined leader, or whomever just happens to be nearby? To address these questions computationally, we formalize Coordination S. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2020:NEC, author = "Heli Sun and Fang He and Jianbin Huang and Yizhou Sun and Yang Li and Chenyu Wang and Liang He and Zhongbin Sun and Xiaolin Jia", title = "Network Embedding for Community Detection in Attributed Networks", journal = j-TKDD, volume = "14", number = "3", pages = "36:1--36:25", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385415", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385415", abstract = "Community detection aims to partition network nodes into a set of clusters, such that nodes are more densely connected to each other within the same cluster than other clusters. For attributed networks, apart from the denseness requirement of topology \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lappas:2020:MCP, author = "Theodoros Lappas", title = "Mining Career Paths from Large Resume Databases: Evidence from {IT} Professionals", journal = j-TKDD, volume = "14", number = "3", pages = "37:1--37:38", month = may, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3379984", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue May 19 09:32:05 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3379984", abstract = "The emergence of online professional platforms, such as LinkedIn and Indeed, has led to unprecedented volumes of rich resume data that have revolutionized the study of careers. One of the most prevalent problems in this space is the extraction of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2020:INA, author = "Si Zhang and Hanghang Tong and Jie Tang and Jiejun Xu and Wei Fan", title = "Incomplete Network Alignment: Problem Definitions and Fast Solutions", journal = j-TKDD, volume = "14", number = "4", pages = "38:1--38:26", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3384203", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3384203", abstract = "Networks are prevalent in many areas and are often collected from multiple sources. However, due to the veracity characteristics, more often than not, networks are incomplete. Network alignment and network completion have become two fundamental \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2020:FDA, author = "Bintao Sun and T.-H. Hubert Chan and Mauro Sozio", title = "Fully Dynamic Approximate $k$-Core Decomposition in Hypergraphs", journal = j-TKDD, volume = "14", number = "4", pages = "39:1--39:21", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385416", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385416", abstract = "In this article, we design algorithms to maintain approximate core values in dynamic hypergraphs. This notion has been well studied for normal graphs in both static and dynamic setting. We generalize the problem to hypergraphs when edges can be inserted \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Balasubramaniam:2020:ENT, author = "Thirunavukarasu Balasubramaniam and Richi Nayak and Chau Yuen", title = "Efficient Nonnegative Tensor Factorization via Saturating Coordinate Descent", journal = j-TKDD, volume = "14", number = "4", pages = "40:1--40:28", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385654", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385654", abstract = "With the advancements in computing technology and web-based applications, data are increasingly generated in multi-dimensional form. These data are usually sparse due to the presence of a large number of users and fewer user interactions. To deal with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kong:2020:GSS, author = "Xiangjie Kong and Jun Zhang and Da Zhang and Yi Bu and Ying Ding and Feng Xia", title = "The Gene of Scientific Success", journal = j-TKDD, volume = "14", number = "4", pages = "41:1--41:19", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385530", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385530", abstract = "This article elaborates how to identify and evaluate causal factors to improve scientific impact. Currently, analyzing scientific impact can be beneficial to various academic activities including funding application, mentor recommendation, discovering \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2020:CTF, author = "Cen Chen and Kenli Li and Sin G. Teo and Xiaofeng Zou and Keqin Li and Zeng Zeng", title = "Citywide Traffic Flow Prediction Based on Multiple Gated Spatio-temporal Convolutional Neural Networks", journal = j-TKDD, volume = "14", number = "4", pages = "42:1--42:23", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385414", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385414", abstract = "Traffic flow prediction is crucial for public safety and traffic management, and remains a big challenge because of many complicated factors, e.g., multiple spatio-temporal dependencies, holidays, and weather. Some work leveraged 2D convolutional neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2020:SCM, author = "Tianyu Zhu and Guannan Liu and Guoqing Chen", title = "Social Collaborative Mutual Learning for Item Recommendation", journal = j-TKDD, volume = "14", number = "4", pages = "43:1--43:19", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387162", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387162", abstract = "Recommender Systems (RSs) provide users with item choices based on their preferences reflected in past interactions and become important tools to alleviate the information overload problem for users. However, in real-world scenarios, the user-item \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Constantinou:2020:LBN, author = "Anthony C. Constantinou", title = "Learning {Bayesian} Networks with the Saiyan Algorithm", journal = j-TKDD, volume = "14", number = "4", pages = "44:1--44:21", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385655", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3385655", abstract = "Some structure learning algorithms have proven to be effective in reconstructing hypothetical Bayesian Network graphs from synthetic data. However, in their mission to maximise a scoring function, many become conservative and minimise edges discovered. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2020:EEB, author = "Changping Wang and Chaokun Wang and Zheng Wang and Xiaojun Ye and Philip S. Yu", title = "{Edge2vec}: Edge-based Social Network Embedding", journal = j-TKDD, volume = "14", number = "4", pages = "45:1--45:24", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391298", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3391298", abstract = "Graph embedding, also known as network embedding and network representation learning, is a useful technique which helps researchers analyze information networks through embedding a network into a low-dimensional space. However, existing graph embedding \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2020:SGC, author = "Xiaofeng Zhu and Shichao Zhang and Jilian Zhang and Yonggang Li and Guangquan Lu and Yang Yang", title = "Sparse Graph Connectivity for Image Segmentation", journal = j-TKDD, volume = "14", number = "4", pages = "46:1--46:19", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397188", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3397188", abstract = "It has been demonstrated that the segmentation performance is highly dependent on both subspace preservation and graph connectivity. In the literature, the full connectivity method linearly represents each data point (e.g., a pixel in one image) by all \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Marques:2020:IEU, author = "Henrique O. Marques and Ricardo J. G. B. Campello and J{\"u}rg Sander and Arthur Zimek", title = "Internal Evaluation of Unsupervised Outlier Detection", journal = j-TKDD, volume = "14", number = "4", pages = "47:1--47:42", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394053", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3394053", abstract = "Although there is a large and growing literature that tackles the unsupervised outlier detection problem, the unsupervised evaluation of outlier detection results is still virtually untouched in the literature. The so-called internal evaluation, based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2020:SWM, author = "Xiaofeng Zhu and Shichao Zhang and Yonghua Zhu and Wei Zheng and Yang Yang", title = "Self-weighted Multi-view Fuzzy Clustering", journal = j-TKDD, volume = "14", number = "4", pages = "48:1--48:17", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3396238", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3396238", abstract = "Since the data in each view may contain distinct information different from other views as well as has common information for all views in multi-view learning, many multi-view clustering methods have been designed to use these information (including the \ldots{}).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Das:2020:DAI, author = "Shubhomoy Das and Weng-Keen Wong and Thomas Dietterich and Alan Fern and Andrew Emmott", title = "Discovering Anomalies by Incorporating Feedback from an Expert", journal = j-TKDD, volume = "14", number = "4", pages = "49:1--49:32", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3396608", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3396608", abstract = "Unsupervised anomaly detection algorithms search for outliers and then predict that these outliers are the anomalies. When deployed, however, these algorithms are often criticized for high false-positive and high false-negative rates. One main cause of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2020:NSR, author = "Yuanbo Xu and Yongjian Yang and En Wang and Jiayu Han and Fuzhen Zhuang and Zhiwen Yu and Hui Xiong", title = "Neural Serendipity Recommendation: Exploring the Balance between Accuracy and Novelty with Sparse Explicit Feedback", journal = j-TKDD, volume = "14", number = "4", pages = "50:1--50:25", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3396607", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jul 10 13:39:39 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3396607", abstract = "Recommender systems have been playing an important role in providing personalized information to users. However, there is always a trade-off between accuracy and novelty in recommender systems. Usually, many users are suffering from redundant or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiong:2020:ISI, author = "Hui Xiong and Chih-Jen Lin", title = "Introduction to the Special Issue on the Best Papers from {KDD 2018}", journal = j-TKDD, volume = "14", number = "5", pages = "51e:1--51e:2", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407901", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3407901", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51e", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2020:TOO, author = "Ping Zhang and Zhifeng Bao and Yuchen Li and Guoliang Li and Yipeng Zhang and Zhiyong Peng", title = "Towards an Optimal Outdoor Advertising Placement: When a Budget Constraint Meets Moving Trajectories", journal = j-TKDD, volume = "14", number = "5", pages = "51:1--51:32", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3350488", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3350488", abstract = "In this article, we propose and study the problem of trajectory-driven influential billboard placement: given a set of billboards U (each with a location and a cost), a database of trajectories T, and a budget L, we find a set of billboards within the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiao:2020:MUM, author = "Keli Xiao and Zeyang Ye and Lihao Zhang and Wenjun Zhou and Yong Ge and Yuefan Deng", title = "Multi-User Mobile Sequential Recommendation for Route Optimization", journal = j-TKDD, volume = "14", number = "5", pages = "52:1--52:28", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3360048", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3360048", abstract = "We enhance the mobile sequential recommendation (MSR) model and address some critical issues in existing formulations by proposing three new forms of the MSR from a multi-user perspective. The multi-user MSR (MMSR) model searches optimal routes for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huai:2020:LDM, author = "Mengdi Huai and Chenglin Miao and Yaliang Li and Qiuling Suo and Lu Su and Aidong Zhang", title = "Learning Distance Metrics from Probabilistic Information", journal = j-TKDD, volume = "14", number = "5", pages = "53:1--53:33", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3364320", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3364320", abstract = "The goal of metric learning is to learn a good distance metric that can capture the relationships among instances, and its importance has long been recognized in many fields. An implicit assumption in the traditional settings of metric learning is that \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2020:PMG, author = "Hongyuan Zhu and Qi Liu and Nicholas Jing Yuan and Kun Zhang and Guang Zhou and Enhong Chen", title = "Pop Music Generation: From Melody to Multi-style Arrangement", journal = j-TKDD, volume = "14", number = "5", pages = "54:1--54:31", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3374915", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3374915", abstract = "Music plays an important role in our daily life. With the development of deep learning and modern generation techniques, researchers have done plenty of works on automatic music generation. However, due to the special requirements of both melody and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mautz:2020:NRS, author = "Dominik Mautz and Wei Ye and Claudia Plant and Christian B{\"o}hm", title = "Non-Redundant Subspace Clusterings with {Nr-Kmeans} and {Nr-DipMeans}", journal = j-TKDD, volume = "14", number = "5", pages = "55:1--55:24", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385652", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3385652", abstract = "A huge object collection in high-dimensional space can often be clustered in more than one way, for instance, objects could be clustered by their shape or alternatively by their color. Each grouping represents a different view of the dataset. The new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Riondato:2020:MMI, author = "Matteo Riondato and Fabio Vandin", title = "{MiSoSouP}: Mining Interesting Subgroups with Sampling and Pseudodimension", journal = j-TKDD, volume = "14", number = "5", pages = "56:1--56:31", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3385653", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3385653", abstract = "We present MiSoSouP, a suite of algorithms for extracting high-quality approximations of the most interesting subgroups, according to different popular interestingness measures, from a random sample of a transactional dataset. We describe a new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zugner:2020:AAG, author = "Daniel Z{\"u}gner and Oliver Borchert and Amir Akbarnejad and Stephan G{\"u}nnemann", title = "Adversarial Attacks on Graph Neural Networks: Perturbations and their Patterns", journal = j-TKDD, volume = "14", number = "5", pages = "57:1--57:31", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394520", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3394520", abstract = "Deep learning models for graphs have achieved strong performance for the task of node classification. Despite their proliferation, little is known about their robustness to adversarial attacks. Yet, in domains where they are likely to be used, e.g., the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2020:EAK, author = "Xu Zhou and Kenli Li and Zhibang Yang and Yunjun Gao and Keqin Li", title = "Efficient Approaches to $k$ Representative {G-Skyline} Queries", journal = j-TKDD, volume = "14", number = "5", pages = "58:1--58:27", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397503", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3397503", abstract = "The G-Skyline (GSky) query is a powerful tool to analyze optimal groups in decision support. Compared with other group skyline queries, it releases users from providing an aggregate function. Besides, it can get much comprehensive results without \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2020:UFS, author = "Peilin Zhao and Dayong Wang and Pengcheng Wu and Steven C. H. Hoi", title = "A Unified Framework for Sparse Online Learning", journal = j-TKDD, volume = "14", number = "5", pages = "59:1--59:20", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3361559", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3361559", abstract = "The amount of data in our society has been exploding in the era of big data. This article aims to address several open challenges in big data stream classification. Many existing studies in data mining literature follow the batch learning setting, which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ceccarello:2020:GCB, author = "Matteo Ceccarello and Andrea Pietracaprina and Geppino Pucci", title = "A General Coreset-Based Approach to Diversity Maximization under Matroid Constraints", journal = j-TKDD, volume = "14", number = "5", pages = "60:1--60:27", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3402448", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3402448", abstract = "Diversity maximization is a fundamental problem in web search and data mining. For a given dataset $S$ of $n$ elements, the problem requires to determine a subset of $S$ containing $ k \ll n$ ``representatives'' which maximize some diversity function expressed in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nguyen:2020:EEC, author = "Hung Nguyen and Xuejian Wang and Leman Akoglu", title = "End-to-End Continual Rare-Class Recognition with Emerging Novel Subclasses", journal = j-TKDD, volume = "14", number = "5", pages = "61:1--61:28", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399660", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3399660", abstract = "Given a labeled dataset that contains a rare (or minority) class containing of-interest instances, as well as a large class of instances that are not of interest, how can we learn to recognize future of-interest instances over a continuous stream? The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2020:EMO, author = "Tingting Wang and Lei Duan and Guozhu Dong and Zhifeng Bao", title = "Efficient Mining of Outlying Sequence Patterns for Analyzing Outlierness of Sequence Data", journal = j-TKDD, volume = "14", number = "5", pages = "62:1--62:26", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399671", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3399671", abstract = "Recently, a lot of research work has been proposed in different domains to detect outliers and analyze the outlierness of outliers for relational data. However, while sequence data is ubiquitous in real life, analyzing the outlierness for sequence data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rossi:2020:PSR, author = "Ryan A. Rossi and Di Jin and Sungchul Kim and Nesreen K. Ahmed and Danai Koutra and John Boaz Lee", title = "On Proximity and Structural Role-based Embeddings in Networks: Misconceptions, Techniques, and Applications", journal = j-TKDD, volume = "14", number = "5", pages = "63:1--63:37", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397191", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Aug 28 11:59:01 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3397191", abstract = "Structural roles define sets of structurally similar nodes that are more similar to nodes inside the set than outside, whereas communities define sets of nodes with more connections inside the set than outside. Roles based on structural similarity and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nikolakopoulos:2020:BIB, author = "Athanasios N. Nikolakopoulos and George Karypis", title = "Boosting Item-based Collaborative Filtering via Nearly Uncoupled Random Walks", journal = j-TKDD, volume = "14", number = "6", pages = "64:1--64:26", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3406241", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3406241", abstract = "Item-based models are among the most popular collaborative filtering approaches for building recommender systems. Random walks can provide a powerful tool for harvesting the rich network of interactions captured within these models. They can exploit \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2020:NAB, author = "Jiarong Xu and Yifan Luo and Jianrong Tao and Changjie Fan and Zhou Zhao and Jiangang Lu", title = "{NGUARD+}: an Attention-based Game Bot Detection Framework via Player Behavior Sequences", journal = j-TKDD, volume = "14", number = "6", pages = "65:1--65:24", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399711", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3399711", abstract = "Game bots are automated programs that assist cheating users, leading to an imbalance in the game ecosystem and the collapse of user interest. Online games provide immersive gaming experience and attract many loyal fans. However, game bots have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2020:IMS, author = "Jianxiong Guo and Weili Wu", title = "Influence Maximization: Seeding Based on Community Structure", journal = j-TKDD, volume = "14", number = "6", pages = "66:1--66:22", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399661", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3399661", abstract = "Influence maximization problem attempts to find a small subset of nodes in a social network that makes the expected influence maximized, which has been researched intensively before. Most of the existing literature focus only on maximizing total \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2020:EUP, author = "Renjun Hu and Yanchi Liu and Yanyan Li and Jingbo Zhou and Shuai Ma and Hui Xiong", title = "Exploiting User Preference and Mobile Peer Influence for Human Mobility Annotation", journal = j-TKDD, volume = "14", number = "6", pages = "67:1--67:18", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3406600", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3406600", abstract = "Human mobility annotation aims to assign mobility records the corresponding visiting Point-of-Interests (POIs). It is one of the most fundamental problems for understanding human mobile behaviors. In literature, many efforts have been devoted to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pang:2020:HUO, author = "Guansong Pang and Longbing Cao", title = "Heterogeneous Univariate Outlier Ensembles in Multidimensional Data", journal = j-TKDD, volume = "14", number = "6", pages = "68:1--68:27", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3403934", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3403934", abstract = "In outlier detection, recent major research has shifted from developing univariate methods to multivariate methods due to the rapid growth of multidimensional data. However, one typical issue of this paradigm shift is that many multidimensional data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zamzami:2020:PMF, author = "Nuha Zamzami and Nizar Bouguila", title = "Probabilistic Modeling for Frequency Vectors Using a Flexible Shifted-Scaled {Dirichlet} Distribution Prior", journal = j-TKDD, volume = "14", number = "6", pages = "69:1--69:35", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3406242", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3406242", abstract = "Burstiness and overdispersion phenomena of count vectors pose significant challenges in modeling such data accurately. While the dependency assumption of the multinomial distribution causes its failure to model frequency vectors in several machine \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Paudel:2020:ACD, author = "Ramesh Paudel and William Eberle", title = "An Approach For Concept Drift Detection in a Graph Stream Using Discriminative Subgraphs", journal = j-TKDD, volume = "14", number = "6", pages = "70:1--70:25", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3406243", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3406243", abstract = "The emergence of mining complex networks like social media, sensor networks, and the world-wide-web has attracted considerable research interest. In a streaming scenario, the concept to be learned can change over time. However, while there has been some \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mohotti:2020:EOD, author = "Wathsala Anupama Mohotti and Richi Nayak", title = "Efficient Outlier Detection in Text Corpus Using Rare Frequency and Ranking", journal = j-TKDD, volume = "14", number = "6", pages = "71:1--71:30", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399712", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3399712", abstract = "Outlier detection in text data collections has become significant due to the need of finding anomalies in the myriad of text data sources. High feature dimensionality, together with the larger size of these document collections, presents a need for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2020:TWS, author = "Chen Zhang and Steven C. H. Hoi and Fugee Tsung", title = "Time-Warped Sparse Non-negative Factorization for Functional Data Analysis", journal = j-TKDD, volume = "14", number = "6", pages = "72:1--72:23", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3408313", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3408313", abstract = "This article proposes a novel time-warped sparse non-negative factorization method for functional data analysis. The proposed method on the one hand guarantees the extracted basis functions and their coefficients to be positive and interpretable, and on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Matheny:2020:SSS, author = "Michael Matheny and Dong Xie and Jeff M. Phillips", title = "Scalable Spatial Scan Statistics for Trajectories", journal = j-TKDD, volume = "14", number = "6", pages = "73:1--73:24", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394046", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3394046", abstract = "We define several new models for how to define anomalous regions among enormous sets of trajectories. These are based on spatial scan statistics, and identify a geometric region which captures a subset of trajectories which are significantly different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2020:BDR, author = "Shuangyin Li and Yu Zhang and Rong Pan", title = "Bi-Directional Recurrent Attentional Topic Model", journal = j-TKDD, volume = "14", number = "6", pages = "74:1--74:30", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3412371", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3412371", abstract = "In a document, the topic distribution of a sentence depends on both the topics of its neighbored sentences and its own content, and it is usually affected by the topics of the neighbored sentences with different weights. The neighbored sentences of a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "74", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2020:RAL, author = "Jipeng Guo and Yanfeng Sun and Junbin Gao and Yongli Hu and Baocai Yin", title = "Robust Adaptive Linear Discriminant Analysis with Bidirectional Reconstruction Constraint", journal = j-TKDD, volume = "14", number = "6", pages = "75:1--75:20", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3409478", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3409478", abstract = "Linear discriminant analysis (LDA) is a well-known supervised method for dimensionality reduction in which the global structure of data can be preserved. The classical LDA is sensitive to the noises, and the projection direction of LDA cannot preserve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "75", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Savva:2020:LSD, author = "Fotis Savva and Christos Anagnostopoulos and Peter Triantafillou and Kostas Kolomvatsos", title = "Large-scale Data Exploration Using Explanatory Regression Functions", journal = j-TKDD, volume = "14", number = "6", pages = "76:1--76:33", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3410448", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3410448", abstract = "Analysts wishing to explore multivariate data spaces, typically issue queries involving selection operators, i.e., range or equality predicates, which define data subspaces of potential interest. Then, they use aggregation functions, the results of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "76", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2020:RRT, author = "Qian Ma and Yu Gu and Wang-Chien Lee and Ge Yu and Hongbo Liu and Xindong Wu", title = "{REMIAN}: Real-Time and Error-Tolerant Missing Value Imputation", journal = j-TKDD, volume = "14", number = "6", pages = "77:1--77:38", month = oct, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3412364", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 8 06:52:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3412364", abstract = "Missing value (MV) imputation is a critical preprocessing means for data mining. Nevertheless, existing MV imputation methods are mostly designed for batch processing, and thus are not applicable to streaming data, especially those with poor quality. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "77", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:HPR, author = "Hao Wang and Shuai Ding and Yeqing Li and Xiaojian Li and Youtao Zhang", title = "Hierarchical Physician Recommendation via Diversity-enhanced Matrix Factorization", journal = j-TKDD, volume = "15", number = "1", pages = "1:1--1:17", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418227", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3418227", abstract = "Recent studies have shown that there exhibits significantly imbalanced medical resource allocation across public hospitals. Patients, regardless of their diseases, tend to choose hospitals and physicians with a better reputation, which often overloads \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Galimberti:2021:SCD, author = "Edoardo Galimberti and Martino Ciaperoni and Alain Barrat and Francesco Bonchi and Ciro Cattuto and Francesco Gullo", title = "Span-core Decomposition for Temporal Networks: Algorithms and Applications", journal = j-TKDD, volume = "15", number = "1", pages = "2:1--2:44", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418226", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3418226", abstract = "When analyzing temporal networks, a fundamental task is the identification of dense structures (i.e., groups of vertices that exhibit a large number of links), together with their temporal span (i.e., the period of time for which the high density holds). \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2021:DGM, author = "Yu Huang and Josh Jia-Ching Ying and Philip S. Yu and Vincent S. Tseng", title = "Dynamic Graph Mining for Multi-weight Multi-destination Route Planning with Deadlines Constraints", journal = j-TKDD, volume = "15", number = "1", pages = "3:1--3:32", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3412363", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3412363", abstract = "Route planning satisfied multiple requests is an emerging branch in the route planning field and has attracted significant attention from the research community in recent years. The prevailing studies focus only on seeking a route by minimizing a single \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Siers:2021:CIC, author = "Michael J. Siers and Md Zahidul Islam", title = "Class Imbalance and Cost-Sensitive Decision Trees: a Unified Survey Based on a Core Similarity", journal = j-TKDD, volume = "15", number = "1", pages = "4:1--4:31", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3415156", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3415156", abstract = "Class imbalance treatment methods and cost-sensitive classification algorithms are typically treated as two independent research areas. However, many of these techniques have properties in common. After providing a background to the two fields of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2021:MSN, author = "Hong Huang and Yu Song and Fanghua Ye and Xing Xie and Xuanhua Shi and Hai Jin", title = "Multi-Stage Network Embedding for Exploring Heterogeneous Edges", journal = j-TKDD, volume = "15", number = "1", pages = "5:1--5:27", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3415157", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3415157", abstract = "The relationships between objects in a network are typically diverse and complex, leading to the heterogeneous edges with different semantic information. In this article, we focus on exploring the heterogeneous edges for network representation learning. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2021:RTR, author = "Yue Hu and Daniel B. Work", title = "Robust Tensor Recovery with Fiber Outliers for Traffic Events", journal = j-TKDD, volume = "15", number = "1", pages = "6:1--6:27", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3417337", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3417337", abstract = "Event detection is gaining increasing attention in smart cities research. Large-scale mobility data serves as an important tool to uncover the dynamics of urban transportation systems, and more often than not the dataset is incomplete. In this article, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2021:ARD, author = "Xiaoyan Zhu and Yingbin Li and Jiayin Wang and Tian Zheng and Jingwen Fu", title = "Automatic Recommendation of a Distance Measure for Clustering Algorithms", journal = j-TKDD, volume = "15", number = "1", pages = "7:1--7:22", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418228", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3418228", abstract = "With a large number of distance measures, the appropriate choice for clustering a given data set with a specified clustering algorithm becomes an important problem. In this article, an automatic distance measure recommendation method for clustering \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bernardini:2021:CAS, author = "Giulia Bernardini and Huiping Chen and Alessio Conte and Roberto Grossi and Grigorios Loukides and Nadia Pisanti and Solon P. Pissis and Giovanna Rosone and Michelle Sweering", title = "Combinatorial Algorithms for String Sanitization", journal = j-TKDD, volume = "15", number = "1", pages = "8:1--8:34", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418683", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3418683", abstract = "String data are often disseminated to support applications such as location-based service provision or DNA sequence analysis. This dissemination, however, may expose sensitive patterns that model confidential knowledge (e.g., trips to mental health \ldots{}).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rossi:2021:HG, author = "Ryan A. Rossi and Nesreen K. Ahmed and Aldo Carranza and David Arbour and Anup Rao and Sungchul Kim and Eunyee Koh", title = "Heterogeneous Graphlets", journal = j-TKDD, volume = "15", number = "1", pages = "9:1--9:43", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418773", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3418773", abstract = "In this article, we introduce a generalization of graphlets to heterogeneous networks called typed graphlets. Informally, typed graphlets are small typed induced subgraphs. Typed graphlets generalize graphlets to rich heterogeneous networks as they \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ji:2021:ALS, author = "Yugang Ji and Mingyang Yin and Hongxia Yang and Jingren Zhou and Vincent W. Zheng and Chuan Shi and Yuan Fang", title = "Accelerating Large-Scale Heterogeneous Interaction Graph Embedding Learning via Importance Sampling", journal = j-TKDD, volume = "15", number = "1", pages = "10:1--10:23", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418684", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3418684", abstract = "In real-world problems, heterogeneous entities are often related to each other through multiple interactions, forming a Heterogeneous Interaction Graph (HIG). While modeling HIGs to deal with fundamental tasks, graph neural networks present an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xun:2021:MPI, author = "Guangxu Xun and Kishlay Jha and Aidong Zhang", title = "{MeSHProbeNet-P}: Improving Large-scale {MeSH} Indexing with Personalizable {MeSH} Probes", journal = j-TKDD, volume = "15", number = "1", pages = "11:1--11:14", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3421713", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3421713", abstract = "Indexing biomedical research articles with Medical Subject Headings (MeSH) can greatly facilitate biomedical research and information retrieval. Currently MeSH indexing is performed by human experts. To alleviate the time consumption and monetary cost \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tu:2021:CCJ, author = "Jinzheng Tu and Guoxian Yu and Jun Wang and Carlotta Domeniconi and Maozu Guo and Xiangliang Zhang", title = "{CrowdWT}: Crowdsourcing via Joint Modeling of Workers and Tasks", journal = j-TKDD, volume = "15", number = "1", pages = "12:1--12:24", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3421712", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3421712", abstract = "Crowdsourcing is a relatively inexpensive and efficient mechanism to collect annotations of data from the open Internet. Crowdsourcing workers are paid for the provided annotations, but the task requester usually has a limited budget. It is desirable to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Azevedo:2021:RNT, author = "Ricardo {De Azevedo} and Gabriel Resende Machado and Ronaldo Ribeiro Goldschmidt and Ricardo Choren", title = "A Reduced Network Traffic Method for {IoT} Data Clustering", journal = j-TKDD, volume = "15", number = "1", pages = "13:1--13:23", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423139", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Mar 28 09:45:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3423139", abstract = "Internet of Things (IoT) systems usually involve interconnected, low processing capacity, and low memory sensor nodes (devices) that collect data in several sorts of applications that interconnect people and things. In this scenario, mining tasks, such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rossi:2021:KGE, author = "Andrea Rossi and Denilson Barbosa and Donatella Firmani and Antonio Matinata and Paolo Merialdo", title = "Knowledge Graph Embedding for Link Prediction: a Comparative Analysis", journal = j-TKDD, volume = "15", number = "2", pages = "14:1--14:49", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3424672", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3424672", abstract = "Knowledge Graphs (KGs) have found many applications in industrial and in academic settings, which in turn, have motivated considerable research efforts towards large-scale information extraction from a variety of sources. Despite such efforts, it is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Amornbunchornvej:2021:ILM, author = "Chainarong Amornbunchornvej and Navaporn Surasvadi and Anon Plangprasopchok and Suttipong Thajchayapong", title = "Identifying Linear Models in Multi-Resolution Population Data Using Minimum Description Length Principle to Predict Household Income", journal = j-TKDD, volume = "15", number = "2", pages = "15:1--15:30", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3424670", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3424670", abstract = "One shirt size cannot fit everybody, while we cannot make a unique shirt that fits perfectly for everyone because of resource limitations. This analogy is true for policy making as well. Policy makers cannot make a single policy to solve all problems \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2021:RSP, author = "Yi Feng and Chuanyi Li and Jidong Ge and Bin Luo and Vincent Ng", title = "Recommending Statutes: a Portable Method Based on Neural Networks", journal = j-TKDD, volume = "15", number = "2", pages = "16:1--16:22", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3424671", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3424671", abstract = "Legal judgment prediction, which aims at predicting judgment results such as penalty, charges, and statutes for cases, has attracted much attention recently. In this article, we focus on building a recommender system to predict the associated statutes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:HNH, author = "Yashen Wang and Huanhuan Zhang", title = "{HARP}: a Novel Hierarchical Attention Model for Relation Prediction", journal = j-TKDD, volume = "15", number = "2", pages = "17:1--17:22", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3424673", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3424673", abstract = "Recent years have witnessed great advancement of representation learning (RL)-based models for the knowledge graph relation prediction task. However, they generally rely on structure information embedded in the encyclopedic knowledge graph, while the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2021:HOS, author = "Dawei Zhou and Si Zhang and Mehmet Yigit Yildirim and Scott Alcorn and Hanghang Tong and Hasan Davulcu and Jingrui He", title = "High-Order Structure Exploration on Massive Graphs: a Local Graph Clustering Perspective", journal = j-TKDD, volume = "15", number = "2", pages = "18:1--18:26", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3425637", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3425637", abstract = "Modeling and exploring high-order connectivity patterns, also called network motifs, are essential for understanding the fundamental structures that control and mediate the behavior of many complex systems. For example, in social networks, triangles \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2021:EFM, author = "Chongshou Li and Brenda Cheang and Zhixing Luo and Andrew Lim", title = "An Exponential Factorization Machine with Percentage Error Minimization to Retail Sales Forecasting", journal = j-TKDD, volume = "15", number = "2", pages = "19:1--19:32", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3426238", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3426238", abstract = "This article proposes a new approach to sales forecasting for new products (stock-keeping units [SKUs]) with long lead time but short product life cycle. These SKUs are usually sold for one season only, without any replenishments. An exponential \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Djenouri:2021:TOD, author = "Youcef Djenouri and Djamel Djenouri and Jerry Chun-Wei Lin", title = "Trajectory Outlier Detection: New Problems and Solutions for Smart Cities", journal = j-TKDD, volume = "15", number = "2", pages = "20:1--20:28", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3425867", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3425867", abstract = "This article introduces two new problems related to trajectory outlier detection: (1) group trajectory outlier (GTO) detection and (2) deviation point detection for both individual and group of trajectory outliers. Five algorithms are proposed for the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:SSR, author = "Kafeng Wang and Haoyi Xiong and Jiang Bian and Zhanxing Zhu and Qian Gao and Zhishan Guo and Cheng-Zhong Xu and Jun Huan and Dejing Dou", title = "Sampling Sparse Representations with Randomized Measurement {Langevin} Dynamics", journal = j-TKDD, volume = "15", number = "2", pages = "21:1--21:21", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3427585", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3427585", abstract = "Stochastic Gradient Langevin Dynamics (SGLD) have been widely used for Bayesian sampling from certain probability distributions, incorporating derivatives of the log-posterior. With the derivative evaluation of the log-posterior distribution, SGLD \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Belohlavek:2021:ATP, author = "Radim Belohlavek and Martin Trnecka", title = "The {8M} Algorithm from Today's Perspective", journal = j-TKDD, volume = "15", number = "2", pages = "22:1--22:22", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3428078", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3428078", abstract = "We provide a detailed analysis and a first complete description of 8M-an old but virtually unknown algorithm for Boolean matrix factorization. Even though the algorithm uses a rather limited insight into the factorization problem from today's \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2021:CIN, author = "En Xu and Zhiwen Yu and Bin Guo and Helei Cui", title = "Core Interest Network for Click-Through Rate Prediction", journal = j-TKDD, volume = "15", number = "2", pages = "23:1--23:16", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3428079", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3428079", abstract = "In modern online advertising systems, the click-through rate (CTR) is an important index to measure the popularity of an item. It refers to the ratio of users who click on a specific advertisement to the number of total users who view it. Predicting the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ghosh:2021:CBE, author = "Aindrila Ghosh and Mona Nashaat and James Miller and Shaikh Quader", title = "Context-Based Evaluation of Dimensionality Reduction Algorithms-Experiments and Statistical Significance Analysis", journal = j-TKDD, volume = "15", number = "2", pages = "24:1--24:40", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3428077", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3428077", abstract = "Dimensionality reduction is a commonly used technique in data analytics. Reducing the dimensionality of datasets helps not only with managing their analytical complexity but also with removing redundancy. Over the years, several such algorithms have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:HNA, author = "Shikang Liu and Fatemeh Vahedian and David Hachen and Omar Lizardo and Christian Poellabauer and Aaron Striegel and Tijana Milenkovi{\'c}", title = "Heterogeneous Network Approach to Predict Individuals' Mental Health", journal = j-TKDD, volume = "15", number = "2", pages = "25:1--25:26", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429446", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3429446", abstract = "Depression and anxiety are critical public health issues affecting millions of people around the world. To identify individuals who are vulnerable to depression and anxiety, predictive models have been built that typically utilize data from one source. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2021:UMF, author = "Zhengze Zhou and Giles Hooker", title = "Unbiased Measurement of Feature Importance in Tree-Based Methods", journal = j-TKDD, volume = "15", number = "2", pages = "26:1--26:21", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429445", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3429445", abstract = "We propose a modification that corrects for split-improvement variable importance measures in Random Forests and other tree-based methods. These methods have been shown to be biased towards increasing the importance of features with more potential \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Almeida:2021:MCB, author = "Matthew Almeida and Yong Zhuang and Wei Ding and Scott E. Crouter and Ping Chen", title = "Mitigating Class-Boundary Label Uncertainty to Reduce Both Model Bias and Variance", journal = j-TKDD, volume = "15", number = "2", pages = "27:1--27:18", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429447", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3429447", abstract = "The study of model bias and variance with respect to decision boundaries is critically important in supervised learning and artificial intelligence. There is generally a tradeoff between the two, as fine-tuning of the decision boundary of a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Munoz:2021:ISA, author = "Mario Andr{\'e}s Mu{\~n}oz and Tao Yan and Matheus R. Leal and Kate Smith-Miles and Ana Carolina Lorena and Gisele L. Pappa and R{\^o}mulo Madureira Rodrigues", title = "An Instance Space Analysis of Regression Problems", journal = j-TKDD, volume = "15", number = "2", pages = "28:1--28:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436893", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3436893", abstract = "The quest for greater insights into algorithm strengths and weaknesses, as revealed when studying algorithm performance on large collections of test problems, is supported by interactive visual analytics tools. A recent advance is Instance Space \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Coscia:2021:NCS, author = "Michele Coscia", title = "Noise Corrected Sampling of Online Social Networks", journal = j-TKDD, volume = "15", number = "2", pages = "29:1--29:21", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434749", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434749", abstract = "In this article, we propose a new method to perform topological network sampling. Topological network sampling is a process for extracting a subset of nodes and edges from a network, such that analyses on the sample provide results and conclusions \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Steinbuss:2021:GAO, author = "Georg Steinbuss and Klemens B{\"o}hm", title = "Generating Artificial Outliers in the Absence of Genuine Ones --- A Survey", journal = j-TKDD, volume = "15", number = "2", pages = "30:1--30:37", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447822", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447822", abstract = "By definition, outliers are rarely observed in reality, making them difficult to detect or analyze. Artificial outliers approximate such genuine outliers and can, for instance, help with the detection of genuine outliers or with benchmarking outlier-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2021:SCS, author = "Yi Zhu and Lei Li and Xindong Wu", title = "Stacked Convolutional Sparse Auto-Encoders for Representation Learning", journal = j-TKDD, volume = "15", number = "2", pages = "31:1--31:21", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434767", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434767", abstract = "Deep learning seeks to achieve excellent performance for representation learning in image datasets. However, supervised deep learning models such as convolutional neural networks require a large number of labeled image data, which is intractable in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2021:JTD, author = "Bin Sun and Dehui Kong and Shaofan Wang and Lichun Wang and Baocai Yin", title = "Joint Transferable Dictionary Learning and View Adaptation for Multi-view Human Action Recognition", journal = j-TKDD, volume = "15", number = "2", pages = "32:1--32:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434746", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434746", abstract = "Multi-view human action recognition remains a challenging problem due to large view changes. In this article, we propose a transfer learning-based framework called transferable dictionary learning and view adaptation (TDVA) model for multi-view human \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Garciarena:2021:TAC, author = "Unai Garciarena and Alexander Mendiburu and Roberto Santana", title = "Towards Automatic Construction of Multi-Network Models for Heterogeneous Multi-Task Learning", journal = j-TKDD, volume = "15", number = "2", pages = "33:1--33:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434748", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sun Apr 11 08:38:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434748", abstract = "Multi-task learning, as it is understood nowadays, consists of using one single model to carry out several similar tasks. From classifying hand-written characters of different alphabets to figuring out how to play several Atari games using reinforcement \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ying:2021:IKB, author = "Shi Ying and Bingming Wang and Lu Wang and Qingshan Li and Yishi Zhao and Jianga Shang and Hao Huang and Guoli Cheng and Zhe Yang and Jiangyi Geng", title = "An Improved {KNN}-Based Efficient Log Anomaly Detection Method with Automatically Labeled Samples", journal = j-TKDD, volume = "15", number = "3", pages = "34:1--34:22", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441448", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441448", abstract = "Logs that record system abnormal states (anomaly logs) can be regarded as outliers, and the k-Nearest Neighbor (kNN) algorithm has relatively high accuracy in outlier detection methods. Therefore, we use the kNN algorithm to detect anomalies in the log \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Salve:2021:PIU, author = "Andrea {De Salve} and Paolo Mori and Barbara Guidi and Laura Ricci and Roberto {Di Pietro}", title = "Predicting Influential Users in Online Social Network Groups", journal = j-TKDD, volume = "15", number = "3", pages = "35:1--35:50", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441447", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441447", abstract = "The widespread adoption of Online Social Networks (OSNs), the ever-increasing amount of information produced by their users, and the corresponding capacity to influence markets, politics, and society, have led both industrial and academic researchers to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xie:2021:UPC, author = "Hong Xie and Mingze Zhong and Yongkun Li and John C. S. Lui", title = "Understanding Persuasion Cascades in Online Product Rating Systems: Modeling, Analysis, and Inference", journal = j-TKDD, volume = "15", number = "3", pages = "36:1--36:29", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440887", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3440887", abstract = "Online product rating systems have become an indispensable component for numerous web services such as Amazon, eBay, Google Play Store, and TripAdvisor. One functionality of such systems is to uncover the product quality via product ratings (or reviews) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2021:POP, author = "Zheng Zhang and Xiaofeng Zhu and Guangming Lu and Yudong Zhang", title = "Probability Ordinal-Preserving Semantic Hashing for Large-Scale Image Retrieval", journal = j-TKDD, volume = "15", number = "3", pages = "37:1--37:22", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442204", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442204", abstract = "Semantic hashing enables computation and memory-efficient image retrieval through learning similarity-preserving binary representations. Most existing hashing methods mainly focus on preserving the piecewise class information or pairwise correlations of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shin:2021:CFA, author = "Kijung Shin and Euiwoong Lee and Jinoh Oh and Mohammad Hammoud and Christos Faloutsos", title = "{CoCoS}: Fast and Accurate Distributed Triangle Counting in Graph Streams", journal = j-TKDD, volume = "15", number = "3", pages = "38:1--38:30", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441487", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441487", abstract = "Given a graph stream, how can we estimate the number of triangles in it using multiple machines with limited storage? Specifically, how should edges be processed and sampled across the machines for rapid and accurate estimation? The count of triangles \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ata:2021:MVC, author = "Sezin Kircali Ata and Yuan Fang and Min Wu and Jiaqi Shi and Chee Keong Kwoh and Xiaoli Li", title = "Multi-View Collaborative Network Embedding", journal = j-TKDD, volume = "15", number = "3", pages = "39:1--39:18", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441450", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441450", abstract = "Real-world networks often exist with multiple views, where each view describes one type of interaction among a common set of nodes. For example, on a video-sharing network, while two user nodes are linked, if they have common favorite videos in one view, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:SVR, author = "Wei Wang and Feng Xia and Jian Wu and Zhiguo Gong and Hanghang Tong and Brian D. Davison", title = "{Scholar2vec}: Vector Representation of Scholars for Lifetime Collaborator Prediction", journal = j-TKDD, volume = "15", number = "3", pages = "40:1--40:19", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442199", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442199", abstract = "While scientific collaboration is critical for a scholar, some collaborators can be more significant than others, e.g., lifetime collaborators. It has been shown that lifetime collaborators are more influential on a scholar's academic performance. However,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bai:2021:TTG, author = "Luyi Bai and Xiangnan Ma and Mingcheng Zhang and Wenting Yu", title = "{TPmod}: a Tendency-Guided Prediction Model for Temporal Knowledge Graph Completion", journal = j-TKDD, volume = "15", number = "3", pages = "41:1--41:17", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3443687", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3443687", abstract = "Temporal knowledge graphs (TKGs) have become useful resources for numerous Artificial Intelligence applications, but they are far from completeness. Inferring missing events in temporal knowledge graphs is a fundamental and challenging task. However, most \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:RCE, author = "Jingjing Wang and Wenjun Jiang and Kenli Li and Keqin Li", title = "Reducing Cumulative Errors of Incremental {CP} Decomposition in Dynamic Online Social Networks", journal = j-TKDD, volume = "15", number = "3", pages = "42:1--42:33", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441645", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441645", abstract = "CANDECOMP/PARAFAC (CP) decomposition is widely used in various online social network (OSN) applications. However, it is inefficient when dealing with massive and incremental data. Some incremental CP decomposition (ICP) methods have been proposed to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2021:PGA, author = "Guanhao Wu and Xiaofeng Gao and Ge Yan and Guihai Chen", title = "Parallel Greedy Algorithm to Multiple Influence Maximization in Social Network", journal = j-TKDD, volume = "15", number = "3", pages = "43:1--43:21", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442341", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442341", abstract = "Influence Maximization (IM) problem is to select influential users to maximize the influence spread, which plays an important role in many real-world applications such as product recommendation, epidemic control, and network monitoring. Nowadays multiple \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2021:EDR, author = "Lei Yang and Xi Yu and Jiannong Cao and Xuxun Liu and Pan Zhou", title = "Exploring Deep Reinforcement Learning for Task Dispatching in Autonomous On-Demand Services", journal = j-TKDD, volume = "15", number = "3", pages = "44:1--44:23", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442343", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442343", abstract = "Autonomous on-demand services, such as GOGOX (formerly GoGoVan) in Hong Kong, provide a platform for users to request services and for suppliers to meet such demands. In such a platform, the suppliers have autonomy to accept or reject the demands to be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2021:GLG, author = "Lin Cheng and Yuliang Shi and Kun Zhang and Xinjun Wang and Zhiyong Chen", title = "{GGATB-LSTM}: Grouping and Global Attention-based Time-aware Bidirectional {LSTM} Medical Treatment Behavior Prediction", journal = j-TKDD, volume = "15", number = "3", pages = "45:1--45:16", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441454", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441454", abstract = "In China, with the continuous development of national health insurance policies, more and more people have joined the health insurance. How to accurately predict patients future medical treatment behavior becomes a hotspot issue. The biggest challenge in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:SRS, author = "Xueyan Liu and Bo Yang and Hechang Chen and Katarzyna Musial and Hongxu Chen and Yang Li and Wanli Zuo", title = "A Scalable Redefined Stochastic Blockmodel", journal = j-TKDD, volume = "15", number = "3", pages = "46:1--46:28", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442589", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442589", abstract = "Stochastic blockmodel (SBM) is a widely used statistical network representation model, with good interpretability, expressiveness, generalization, and flexibility, which has become prevalent and important in the field of network science over the last \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:KTW, author = "Yan Liu and Bin Guo and Daqing Zhang and Djamal Zeghlache and Jingmin Chen and Ke Hu and Sizhe Zhang and Dan Zhou and Zhiwen Yu", title = "Knowledge Transfer with Weighted Adversarial Network for Cold-Start Store Site Recommendation", journal = j-TKDD, volume = "15", number = "3", pages = "47:1--47:27", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442203", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442203", abstract = "Store site recommendation aims to predict the value of the store at candidate locations and then recommend the optimal location to the company for placing a new brick-and-mortar store. Most existing studies focus on learning machine learning or deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nasir:2021:TAM, author = "Muhammad Anis Uddin Nasir and Cigdem Aslay and Gianmarco {De Francisci Morales} and Matteo Riondato", title = "{TipTap}: Approximate Mining of Frequent $k$-Subgraph Patterns in Evolving Graphs", journal = j-TKDD, volume = "15", number = "3", pages = "48:1--48:35", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442590", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442590", abstract = "``Perhaps he could dance first and think afterwards, if it isn't too much to ask him.'' S. Beckett, Waiting for Godot Given a labeled graph, the collection of -vertex induced connected subgraph patterns that appear in the graph more frequently than a user-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2021:PIR, author = "Chen Lin and Zhichao Ouyang and Xiaoli Wang and Hui Li and Zhenhua Huang", title = "Preserve Integrity in Realtime Event Summarization", journal = j-TKDD, volume = "15", number = "3", pages = "49:1--49:29", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442344", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442344", abstract = "Online text streams such as Twitter are the major information source for users when they are looking for ongoing events. Realtime event summarization aims to generate and update coherent and concise summaries to describe the state of a given event. Due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2021:DLB, author = "Jie Jiang and Qiuqiang Kong and Mark D. Plumbley and Nigel Gilbert and Mark Hoogendoorn and Diederik M. Roijers", title = "Deep Learning-Based Energy Disaggregation and On\slash Off Detection of Household Appliances", journal = j-TKDD, volume = "15", number = "3", pages = "50:1--50:21", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441300", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441300", abstract = "Energy disaggregation, a.k.a. Non-Intrusive Load Monitoring, aims to separate the energy consumption of individual appliances from the readings of a mains power meter measuring the total energy consumption of, e.g., a whole house. Energy consumption of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2021:EHQ, author = "Haida Zhang and Zengfeng Huang and Xuemin Lin and Zhe Lin and Wenjie Zhang and Ying Zhang", title = "Efficient and High-Quality Seeded Graph Matching: Employing Higher-order Structural Information", journal = j-TKDD, volume = "15", number = "3", pages = "51:1--51:31", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442340", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442340", abstract = "Driven by many real applications, we study the problem of seeded graph matching. Given two graphs and, and a small set of pre-matched node pairs where and, the problem is to identify a matching between and growing from, such that each pair in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Barlaug:2021:NNE, author = "Nils Barlaug and Jon Atle Gulla", title = "Neural Networks for Entity Matching: a Survey", journal = j-TKDD, volume = "15", number = "3", pages = "52:1--52:37", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442200", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442200", abstract = "Entity matching is the problem of identifying which records refer to the same real-world entity. It has been actively researched for decades, and a variety of different approaches have been developed. Even today, it remains a challenging problem, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2021:FCM, author = "Chen Chen and Ruiyue Peng and Lei Ying and Hanghang Tong", title = "Fast Connectivity Minimization on Large-Scale Networks", journal = j-TKDD, volume = "15", number = "3", pages = "53:1--53:25", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442342", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 5 08:45:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442342", abstract = "The connectivity of networks has been widely studied in many high-impact applications, ranging from immunization, critical infrastructure analysis, social network mining, to bioinformatic system studies. Regardless of the end application domains, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:LBC, author = "Yunzhe Wang and George Baciu and Chenhui Li", title = "A Layout-Based Classification Method for Visualizing Time-Varying Graphs", journal = j-TKDD, volume = "15", number = "4", pages = "54:1--54:24", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441301", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441301", abstract = "Connectivity analysis between the components of large evolving systems can reveal significant patterns of interaction. The systems can be simulated by topological graph structures. However, such analysis becomes challenging on large and complex graphs. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ouyang:2021:MAC, author = "Yi Ouyang and Bin Guo and Xing Tang and Xiuqiang He and Jian Xiong and Zhiwen Yu", title = "Mobile App Cross-Domain Recommendation with Multi-Graph Neural Network", journal = j-TKDD, volume = "15", number = "4", pages = "55:1--55:21", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442201", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442201", abstract = "With the rapid development of mobile app ecosystem, mobile apps have grown greatly popular. The explosive growth of apps makes it difficult for users to find apps that meet their interests. Therefore, it is necessary to recommend user with a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dornaika:2021:EET, author = "F. Dornaika", title = "Elastic Embedding through Graph Convolution-based Regression for Semi-supervised Classification", journal = j-TKDD, volume = "15", number = "4", pages = "56:1--56:11", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441456", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441456", abstract = "This article introduces a scheme for semi-supervised learning by estimating a flexible non-linear data representation that exploits Spectral Graph Convolutions structure. Structured data are exploited in order to determine non-linear and linear models. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2021:LTE, author = "Yanni Li and Bing Liu and Yongbo Yu and Hui Li and Jiacan Sun and Jiangtao Cui", title = "{3E-LDA}: Three Enhancements to Linear Discriminant Analysis", journal = j-TKDD, volume = "15", number = "4", pages = "57:1--57:20", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442347", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442347", abstract = "Linear discriminant analysis (LDA) is one of the important techniques for dimensionality reduction, machine learning, and pattern recognition. However, in many applications, applying the classical LDA often faces the following problems: (1) sensitivity \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zang:2021:JMS, author = "Tianzi Zang and Yanmin Zhu and Yanan Xu and Jiadi Yu", title = "Jointly Modeling Spatio-Temporal Dependencies and Daily Flow Correlations for Crowd Flow Prediction", journal = j-TKDD, volume = "15", number = "4", pages = "58:1--58:20", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439346", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3439346", abstract = "Crowd flow prediction is a vital problem for an intelligent transportation system construction in a smart city. It plays a crucial role in traffic management and behavioral analysis, thus it has raised great attention from many researchers. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ahmed:2021:OST, author = "Nesreen K. Ahmed and Nick Duffield and Ryan A. Rossi", title = "Online Sampling of Temporal Networks", journal = j-TKDD, volume = "15", number = "4", pages = "59:1--59:27", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442202", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3442202", abstract = "Temporal networks representing a stream of timestamped edges are seemingly ubiquitous in the real world. However, the massive size and continuous nature of these networks make them fundamentally challenging to analyze and leverage for descriptive and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2021:SIF, author = "Huan Zhao and Quanming Yao and Yangqiu Song and James T. Kwok and Dik Lun Lee", title = "Side Information Fusion for Recommender Systems over Heterogeneous Information Network", journal = j-TKDD, volume = "15", number = "4", pages = "60:1--60:32", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441446", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441446", abstract = "Collaborative filtering (CF) has been one of the most important and popular recommendation methods, which aims at predicting users' preferences (ratings) based on their past behaviors. Recently, various types of side information beyond the explicit \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2021:SEB, author = "Daokun Zhang and Jie Yin and Xingquan Zhu and Chengqi Zhang", title = "Search Efficient Binary Network Embedding", journal = j-TKDD, volume = "15", number = "4", pages = "61:1--61:27", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436892", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3436892", abstract = "Traditional network embedding primarily focuses on learning a continuous vector representation for each node, preserving network structure and/or node content information, such that off-the-shelf machine learning algorithms can be easily applied to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Song:2021:NEH, author = "Guojie Song and Yun Wang and Lun Du and Yi Li and Junshan Wang", title = "Network Embedding on Hierarchical Community Structure Network", journal = j-TKDD, volume = "15", number = "4", pages = "62:1--62:23", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434747", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434747", abstract = "Network embedding is a method of learning a low-dimensional vector representation of network vertices under the condition of preserving different types of network properties. Previous studies mainly focus on preserving structural information of vertices \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2021:UVC, author = "Kui Yu and Lin Liu and Jiuyong Li", title = "A Unified View of Causal and Non-causal Feature Selection", journal = j-TKDD, volume = "15", number = "4", pages = "63:1--63:46", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436891", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3436891", abstract = "In this article, we aim to develop a unified view of causal and non-causal feature selection methods. The unified view will fill in the gap in the research of the relation between the two types of methods. Based on the Bayesian network framework and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yin:2021:RIR, author = "Shuai Yin and Yanfeng Sun and Junbin Gao and Yongli Hu and Boyue Wang and Baocai Yin", title = "Robust Image Representation via Low Rank Locality Preserving Projection", journal = j-TKDD, volume = "15", number = "4", pages = "64:1--64:22", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434768", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434768", abstract = "Locality preserving projection (LPP) is a dimensionality reduction algorithm preserving the neighhorhood graph structure of data. However, the conventional LPP is sensitive to outliers existing in data. This article proposes a novel low-rank LPP model \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Steinbuss:2021:BUO, author = "Georg Steinbuss and Klemens B{\"o}hm", title = "Benchmarking Unsupervised Outlier Detection with Realistic Synthetic Data", journal = j-TKDD, volume = "15", number = "4", pages = "65:1--65:20", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441453", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441453", abstract = "Benchmarking unsupervised outlier detection is difficult. Outliers are rare, and existing benchmark data contains outliers with various and unknown characteristics. Fully synthetic data usually consists of outliers and regular instances with clear \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2021:SEK, author = "Mingkai Lin and Wenzhong Li and Lynda J. Song and Cam-Tu Nguyen and Xiaoliang Wang and Sanglu Lu", title = "{SAKE}: Estimating {Katz} Centrality Based on Sampling for Large-Scale Social Networks", journal = j-TKDD, volume = "15", number = "4", pages = "66:1--66:21", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441646", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441646", abstract = "Katz centrality is a fundamental concept to measure the influence of a vertex in a social network. However, existing approaches to calculating Katz centrality in a large-scale network are unpractical and computationally expensive. In this article, we \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Amornbunchornvej:2021:VLG, author = "Chainarong Amornbunchornvej and Elena Zheleva and Tanya Berger-Wolf", title = "Variable-lag {Granger} Causality and Transfer Entropy for Time Series Analysis", journal = j-TKDD, volume = "15", number = "4", pages = "67:1--67:30", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441452", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441452", abstract = "Granger causality is a fundamental technique for causal inference in time series data, commonly used in the social and biological sciences. Typical operationalizations of Granger causality make a strong assumption that every time point of the effect \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xia:2021:ETD, author = "Peike Xia and Wenjun Jiang and Jie Wu and Surong Xiao and Guojun Wang", title = "Exploiting Temporal Dynamics in Product Reviews for Dynamic Sentiment Prediction at the Aspect Level", journal = j-TKDD, volume = "15", number = "4", pages = "68:1--68:29", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441451", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441451", abstract = "Online reviews and ratings play an important role in shaping the purchase decisions of customers in e-commerce. Many researches have been done to make proper recommendations for users, by exploiting reviews, ratings, user profiles, or behaviors. However,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kumar:2021:AGN, author = "Suhansanu Kumar and Hari Sundaram", title = "Attribute-Guided Network Sampling Mechanisms", journal = j-TKDD, volume = "15", number = "4", pages = "69:1--69:24", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441445", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441445", abstract = "This article introduces a novel task-independent sampler for attributed networks. The problem is important because while data mining tasks on network content are common, sampling on internet-scale networks is costly. Link-trace samplers such as Snowball \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ghasemi:2021:UEE, author = "Negin Ghasemi and Ramin Fatourechi and Saeedeh Momtazi", title = "User Embedding for Expert Finding in Community Question Answering", journal = j-TKDD, volume = "15", number = "4", pages = "70:1--70:16", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441302", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441302", abstract = "The number of users who have the appropriate knowledge to answer asked questions in community question answering is lower than those who ask questions. Therefore, finding expert users who can answer the questions is very crucial and useful. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yan:2021:SAB, author = "Ruidong Yan and Yi Li and Deying Li and Yongcai Wang and Yuqing Zhu and Weili Wu", title = "A Stochastic Algorithm Based on Reverse Sampling Technique to Fight Against the Cyberbullying", journal = j-TKDD, volume = "15", number = "4", pages = "71:1--71:22", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441455", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441455", abstract = "Cyberbullying has caused serious consequences especially for social network users in recent years. However, the challenge is how to fight against the cyberbullying effectively from the algorithmic perspective. In this article, we study the fighting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2021:ANE, author = "Juan-Hui Li and Ling Huang and Chang-Dong Wang and Dong Huang and Jian-Huang Lai and Pei Chen", title = "Attributed Network Embedding with Micro-Meso Structure", journal = j-TKDD, volume = "15", number = "4", pages = "72:1--72:26", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441486", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441486", abstract = "Recently, network embedding has received a large amount of attention in network analysis. Although some network embedding methods have been developed from different perspectives, on one hand, most of the existing methods only focus on leveraging the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2021:CHI, author = "Benhui Zhang and Maoguo Gong and Jianbin Huang and Xiaoke Ma", title = "Clustering Heterogeneous Information Network by Joint Graph Embedding and Nonnegative Matrix Factorization", journal = j-TKDD, volume = "15", number = "4", pages = "73:1--73:25", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441449", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 19 06:16:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441449", abstract = "Many complex systems derived from nature and society consist of multiple types of entities and heterogeneous interactions, which can be effectively modeled as heterogeneous information network (HIN). Structural analysis of heterogeneous networks is of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yao:2021:SCI, author = "Liuyi Yao and Zhixuan Chu and Sheng Li and Yaliang Li and Jing Gao and Aidong Zhang", title = "A Survey on Causal Inference", journal = j-TKDD, volume = "15", number = "5", pages = "74:1--74:46", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3444944", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3444944", abstract = "Causal inference is a critical research topic across many domains, such as statistics, computer science, education, public policy, and economics, for decades. Nowadays, estimating causal effect from observational data has become an appealing research \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "74", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jurdi:2021:CNN, author = "Wissam {Al Jurdi} and Jacques {Bou Abdo} and Jacques Demerjian and Abdallah Makhoul", title = "Critique on Natural Noise in Recommender Systems", journal = j-TKDD, volume = "15", number = "5", pages = "75:1--75:30", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447780", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447780", abstract = "Recommender systems have been upgraded, tested, and applied in many, often incomparable ways. In attempts to diligently understand user behavior in certain environments, those systems have been frequently utilized in domains like e-commerce, e-learning, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "75", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Duong:2021:DGF, author = "Quang-huy Duong and Heri Ramampiaro and Kjetil N{\o}rv{\aa}g and Thu-lan Dam", title = "Density Guarantee on Finding Multiple Subgraphs and Subtensors", journal = j-TKDD, volume = "15", number = "5", pages = "76:1--76:32", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446668", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3446668", abstract = "Dense subregion (subgraph \& subtensor) detection is a well-studied area, with a wide range of applications, and numerous efficient approaches and algorithms have been proposed. Approximation approaches are commonly used for detecting dense subregions \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "76", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Burkhardt:2021:OAB, author = "Paul Burkhardt", title = "Optimal Algebraic Breadth-First Search for Sparse Graphs", journal = j-TKDD, volume = "15", number = "5", pages = "77:1--77:19", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446216", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3446216", abstract = "There has been a rise in the popularity of algebraic methods for graph algorithms given the development of the GraphBLAS library and other sparse matrix methods. An exemplar for these approaches is Breadth-First Search (BFS). The algebraic BFS algorithm \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "77", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Maurya:2021:GNN, author = "Sunil Kumar Maurya and Xin Liu and Tsuyoshi Murata", title = "Graph Neural Networks for Fast Node Ranking Approximation", journal = j-TKDD, volume = "15", number = "5", pages = "78:1--78:32", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446217", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3446217", abstract = "Graphs arise naturally in numerous situations, including social graphs, transportation graphs, web graphs, protein graphs, etc. One of the important problems in these settings is to identify which nodes are important in the graph and how they affect the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "78", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Stefani:2021:TSE, author = "Lorenzo {De Stefani} and Erisa Terolli and Eli Upfal", title = "Tiered Sampling: an Efficient Method for Counting Sparse Motifs in Massive Graph Streams", journal = j-TKDD, volume = "15", number = "5", pages = "79:1--79:52", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441299", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441299", abstract = "We introduce Tiered Sampling, a novel technique for estimating the count of sparse motifs in massive graphs whose edges are observed in a stream. Our technique requires only a single pass on the data and uses a memory of fixed size M, which can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "79", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bauer:2021:ICL, author = "Josef Bauer and Dietmar Jannach", title = "Improved Customer Lifetime Value Prediction With Sequence-To-Sequence Learning and Feature-Based Models", journal = j-TKDD, volume = "15", number = "5", pages = "80:1--80:37", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441444", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3441444", abstract = "The prediction of the Customer Lifetime Value (CLV) is an important asset for tool-supported marketing by customer relationship managers. Since standard methods based on purchase recency, frequency, and past profit and revenue statistics often have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "80", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sanei-Mehri:2021:MLM, author = "Seyed-Vahid Sanei-Mehri and Apurba Das and Hooman Hashemi and Srikanta Tirthapura", title = "Mining Largest Maximal Quasi-Cliques", journal = j-TKDD, volume = "15", number = "5", pages = "81:1--81:21", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446637", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3446637", abstract = "Quasi-cliques are dense incomplete subgraphs of a graph that generalize the notion of cliques. Enumerating quasi-cliques from a graph is a robust way to detect densely connected structures with applications in bioinformatics and social network analysis. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "81", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gan:2021:UMA, author = "Wensheng Gan and Jerry Chun-Wei Lin and Jiexiong Zhang and Hongzhi Yin and Philippe Fournier-Viger and Han-Chieh Chao and Philip S. Yu", title = "Utility Mining Across Multi-Dimensional Sequences", journal = j-TKDD, volume = "15", number = "5", pages = "82:1--82:24", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446938", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3446938", abstract = "Knowledge extraction from database is the fundamental task in database and data mining community, which has been applied to a wide range of real-world applications and situations. Different from the support-based mining models, the utility-oriented \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "82", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hao:2021:DEI, author = "Shaoyang Hao and Bin Guo and Hao Wang and Yunji Liang and Lina Yao and Qianru Wang and Zhiwen Yu", title = "{DeepDepict}: Enabling Information Rich, Personalized Product Description Generation With the Deep Multiple Pointer Generator Network", journal = j-TKDD, volume = "15", number = "5", pages = "83:1--83:16", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446982", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3446982", abstract = "In e-commerce platforms, the online descriptive information of products shows significant impacts on the purchase behaviors. To attract potential buyers for product promotion, numerous workers are employed to write the impressive product descriptions. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "83", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2021:AIM, author = "Jianxiong Guo and Weili Wu", title = "Adaptive Influence Maximization: If Influential Node Unwilling to Be the Seed", journal = j-TKDD, volume = "15", number = "5", pages = "84:1--84:23", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447396", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447396", abstract = "Influence maximization problem attempts to find a small subset of nodes that makes the expected influence spread maximized, which has been researched intensively before. They all assumed that each user in the seed set we select is activated successfully \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "84", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2021:DEB, author = "Weiyu Cheng and Yanyan Shen and Linpeng Huang and Yanmin Zhu", title = "Dual-Embedding based Deep Latent Factor Models for Recommendation", journal = j-TKDD, volume = "15", number = "5", pages = "85:1--85:24", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447395", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447395", abstract = "Among various recommendation methods, latent factor models are usually considered to be state-of-the-art techniques, which aim to learn user and item embeddings for predicting user-item preferences. When applying latent factor models to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "85", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sharma:2021:STL, author = "Shalini Sharma and Angshul Majumdar", title = "Sequential Transform Learning", journal = j-TKDD, volume = "15", number = "5", pages = "86:1--86:18", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447394", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447394", abstract = "This work proposes a new approach for dynamical modeling; we call it sequential transform learning. This is loosely based on the transform (analysis dictionary) learning formulation. This is the first work on this topic. Transform learning, was \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "86", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:SAS, author = "Kai Liu and Hongbo Liu and Tomas E. Ward and Hua Wang and Yu Yang and Bo Zhang and Xindong Wu", title = "Self-Adaptive Skeleton Approaches to Detect Self-Organized Coalitions From Brain Functional Networks Through Probabilistic Mixture Models", journal = j-TKDD, volume = "15", number = "5", pages = "87:1--87:26", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447570", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447570", abstract = "Detecting self-organized coalitions from functional networks is one of the most important ways to uncover functional mechanisms in the brain. Determining these raises well-known technical challenges in terms of scale imbalance, outliers and hard-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "87", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ling:2021:DGM, author = "Xiang Ling and Lingfei Wu and Saizhuo Wang and Gaoning Pan and Tengfei Ma and Fangli Xu and Alex X. Liu and Chunming Wu and Shouling Ji", title = "Deep Graph Matching and Searching for Semantic Code Retrieval", journal = j-TKDD, volume = "15", number = "5", pages = "88:1--88:21", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447571", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447571", abstract = "Code retrieval is to find the code snippet from a large corpus of source code repositories that highly matches the query of natural language description. Recent work mainly uses natural language processing techniques to process both query texts (i.e., \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "88", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Peng:2021:SSE, author = "Hao Peng and Jianxin Li and Yangqiu Song and Renyu Yang and Rajiv Ranjan and Philip S. Yu and Lifang He", title = "Streaming Social Event Detection and Evolution Discovery in Heterogeneous Information Networks", journal = j-TKDD, volume = "15", number = "5", pages = "89:1--89:33", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447585", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447585", abstract = "Events are happening in real world and real time, which can be planned and organized for occasions, such as social gatherings, festival celebrations, influential meetings, or sports activities. Social media platforms generate a lot of real-time text \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "89", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yue:2021:EBC, author = "Lin Yue and Hao Shen and Sen Wang and Robert Boots and Guodong Long and Weitong Chen and Xiaowei Zhao", title = "Exploring {BCI} Control in Smart Environments: Intention Recognition Via {EEG} Representation Enhancement Learning", journal = j-TKDD, volume = "15", number = "5", pages = "90:1--90:20", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450449", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3450449", abstract = "The brain-computer interface (BCI) control technology that utilizes motor imagery to perform the desired action instead of manual operation will be widely used in smart environments. However, most of the research lacks robust feature representation of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "90", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:ADK, author = "Huawen Liu and Enhui Li and Xinwang Liu and Kaile Su and Shichao Zhang", title = "Anomaly Detection With Kernel Preserving Embedding", journal = j-TKDD, volume = "15", number = "5", pages = "91:1--91:18", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447684", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447684", abstract = "Similarity representation plays a central role in increasingly popular anomaly detection techniques, which have been successfully applied in various realistic scenes. Until now, many low-rank representation techniques have been introduced to measure the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "91", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:MMG, author = "Bo Liu and Xi He and Mingdong Song and Jiangqiang Li and Guangzhi Qu and Jianlei Lang and Rentao Gu", title = "A Method for Mining {Granger} Causality Relationship on Atmospheric Visibility", journal = j-TKDD, volume = "15", number = "5", pages = "92:1--92:16", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447681", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Jun 29 08:31:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447681", abstract = "Atmospheric visibility is an indicator of atmospheric transparency and its range directly reflects the quality of the atmospheric environment. With the acceleration of industrialization and urbanization, the natural environment has suffered some \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "92", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Paul:2021:MOC, author = "Dipanjyoti Paul and Rahul Kumar and Sriparna Saha and Jimson Mathew", title = "Multi-objective Cuckoo Search-based Streaming Feature Selection for Multi-label Dataset", journal = j-TKDD, volume = "15", number = "6", pages = "93:1--93:24", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447586", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447586", abstract = "The feature selection method is the process of selecting only relevant features by removing irrelevant or redundant features amongst the large number of features that are used to represent data. Nowadays, many application domains especially social media \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "93", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Coro:2021:LRS, author = "Federico Cor{\'o} and Gianlorenzo D'angelo and Yllka Velaj", title = "Link Recommendation for Social Influence Maximization", journal = j-TKDD, volume = "15", number = "6", pages = "94:1--94:23", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3449023", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3449023", abstract = "Social link recommendation systems, like ``People-you-may-know'' on Facebook, ``Who-to-follow'' on Twitter, and ``Suggested-Accounts'' on Instagram assist the users of a social network in establishing new connections with other users. While these systems are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "94", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2021:TPO, author = "Xiaofeng Gao and Wenyi Xu and Mingding Liao and Guihai Chen", title = "Trust Prediction for Online Social Networks with Integrated Time-Aware Similarity", journal = j-TKDD, volume = "15", number = "6", pages = "95:1--95:30", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447682", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447682", abstract = "Online social networks gain increasing popularity in recent years. In online social networks, trust prediction is significant for recommendations of high reputation users as well as in many other applications. In the literature, trust prediction problem \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "95", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bressan:2021:FMC, author = "Marco Bressan and Stefano Leucci and Alessandro Panconesi", title = "Faster Motif Counting via Succinct Color Coding and Adaptive Sampling", journal = j-TKDD, volume = "15", number = "6", pages = "96:1--96:27", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447397", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447397", abstract = "We address the problem of computing the distribution of induced connected subgraphs, aka graphlets or motifs, in large graphs. The current state-of-the-art algorithms estimate the motif counts via uniform sampling by leveraging the color coding technique \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "96", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zeng:2021:FRD, author = "Shaoning Zeng and Bob Zhang and Jianping Gou and Yong Xu and Wei Huang", title = "Fast and Robust Dictionary-based Classification for Image Data", journal = j-TKDD, volume = "15", number = "6", pages = "97:1--97:22", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3449360", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3449360", abstract = "Dictionary-based classification has been promising in knowledge discovery from image data, due to its good performance and interpretable theoretical system. Dictionary learning effectively supports both small- and large-scale datasets, while its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "97", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2021:SEN, author = "Chenglin Li and Carrie Lu Tong and Di Niu and Bei Jiang and Xiao Zuo and Lei Cheng and Jian Xiong and Jianming Yang", title = "Similarity Embedding Networks for Robust Human Activity Recognition", journal = j-TKDD, volume = "15", number = "6", pages = "98:1--98:17", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448021", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3448021", abstract = "Deep learning models for human activity recognition (HAR) based on sensor data have been heavily studied recently. However, the generalization ability of deep models on complex real-world HAR data is limited by the availability of high-quality labeled \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "98", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Koley:2021:DEE, author = "Paramita Koley and Avirup Saha and Sourangshu Bhattacharya and Niloy Ganguly and Abir De", title = "Demarcating Endogenous and Exogenous Opinion Dynamics: an Experimental Design Approach", journal = j-TKDD, volume = "15", number = "6", pages = "99:1--99:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3449361", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3449361", abstract = "The networked opinion diffusion in online social networks is often governed by the two genres of opinions- endogenous opinions that are driven by the influence of social contacts among users, and exogenous opinions which are formed by external effects like \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "99", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Joaristi:2021:SGF, author = "Mikel Joaristi and Edoardo Serra", title = "{SIR-GN}: a Fast Structural Iterative Representation Learning Approach For Graph Nodes", journal = j-TKDD, volume = "15", number = "6", pages = "100:1--100:39", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450315", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3450315", abstract = "Graph representation learning methods have attracted an increasing amount of attention in recent years. These methods focus on learning a numerical representation of the nodes in a graph. Learning these representations is a powerful instrument for tasks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "100", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2021:LGN, author = "Man Wu and Shirui Pan and Lan Du and Xingquan Zhu", title = "Learning Graph Neural Networks with Positive and Unlabeled Nodes", journal = j-TKDD, volume = "15", number = "6", pages = "101:1--101:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450316", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3450316", abstract = "Graph neural networks (GNNs) are important tools for transductive learning tasks, such as node classification in graphs, due to their expressive power in capturing complex interdependency between nodes. To enable GNN learning, existing works typically \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "101", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2021:NNS, author = "Dongsheng Li and Haodong Liu and Chao Chen and Yingying Zhao and Stephen M. Chu and Bo Yang", title = "{NeuSE}: a Neural Snapshot Ensemble Method for Collaborative Filtering", journal = j-TKDD, volume = "15", number = "6", pages = "102:1--102:20", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450526", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3450526", abstract = "In collaborative filtering (CF) algorithms, the optimal models are usually learned by globally minimizing the empirical risks averaged over all the observed data. However, the global models are often obtained via a performance tradeoff among users/items, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "102", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Deng:2021:PUT, author = "Jinliang Deng and Xiusi Chen and Zipei Fan and Renhe Jiang and Xuan Song and Ivor W. Tsang", title = "The Pulse of Urban Transport: Exploring the Co-evolving Pattern for Spatio-temporal Forecasting", journal = j-TKDD, volume = "15", number = "6", pages = "103:1--103:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450528", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3450528", abstract = "Transportation demand forecasting is a topic of large practical value. However, the model that fits the demand of one transportation by only considering the historical data of its own could be vulnerable since random fluctuations could easily impact the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "103", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:HCD, author = "Yashen Wang and Huanhuan Zhang and Zhirun Liu and Qiang Zhou", title = "Hierarchical Concept-Driven Language Model", journal = j-TKDD, volume = "15", number = "6", pages = "104:1--104:22", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451167", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451167", abstract = "For guiding natural language generation, many semantic-driven methods have been proposed. While clearly improving the performance of the end-to-end training task, these existing semantic-driven methods still have clear limitations: for example, (i) they \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "104", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Alarte:2021:PLM, author = "Juli{\'a}n Alarte and Josep Silva", title = "Page-Level Main Content Extraction From Heterogeneous {Webpages}", journal = j-TKDD, volume = "15", number = "6", pages = "105:1--105:105", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451168", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451168", abstract = "The main content of a webpage is often surrounded by other boilerplate elements related to the template, such as menus, advertisements, copyright notices, and comments. For crawlers and indexers, isolating the main content from the template and other \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "105", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nettasinghe:2021:MLE, author = "Buddhika Nettasinghe and Vikram Krishnamurthy", title = "Maximum Likelihood Estimation of Power-law Degree Distributions via Friendship Paradox-based Sampling", journal = j-TKDD, volume = "15", number = "6", pages = "106:1--106:28", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451166", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451166", abstract = "This article considers the problem of estimating a power-law degree distribution of an undirected network using sampled data. Although power-law degree distributions are ubiquitous in nature, the widely used parametric methods for estimating them (e.g., \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "106", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Anaissi:2021:OTB, author = "Ali Anaissi and Basem Suleiman and Seid Miad Zandavi", title = "Online Tensor-Based Learning Model for Structural Damage Detection", journal = j-TKDD, volume = "15", number = "6", pages = "107:1--107:18", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451217", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451217", abstract = "The online analysis of multi-way data stored in a tensor has become an essential tool for capturing the underlying structures and extracting the sensitive features that can be used to learn a predictive model. However, data distributions often evolve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "107", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:MIM, author = "Rui Wang and Yongkun Li and Shuai Lin and Hong Xie and Yinlong Xu and John C. S. Lui", title = "On Modeling Influence Maximization in Social Activity Networks under General Settings", journal = j-TKDD, volume = "15", number = "6", pages = "108:1--108:28", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451218", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451218", abstract = "Finding the set of most influential users in online social networks (OSNs) to trigger the largest influence cascade is meaningful, e.g., companies may leverage the ``word-of-mouth'' effect to trigger a large cascade of purchases by offering free samples/. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "108", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2021:ICD, author = "Zhe Chen and Aixin Sun and Xiaokui Xiao", title = "Incremental Community Detection on Large Complex Attributed Network", journal = j-TKDD, volume = "15", number = "6", pages = "109:1--109:20", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451216", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451216", abstract = "Community detection on network data is a fundamental task, and has many applications in industry. Network data in industry can be very large, with incomplete and complex attributes, and more importantly, growing. This calls for a community detection \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "109", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xia:2021:GDD, author = "Tong Xia and Junjie Lin and Yong Li and Jie Feng and Pan Hui and Funing Sun and Diansheng Guo and Depeng Jin", title = "{$3$DGCN}: {$3$-Dimensional} Dynamic Graph Convolutional Network for Citywide Crowd Flow Prediction", journal = j-TKDD, volume = "15", number = "6", pages = "110:1--110:21", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451394", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451394", abstract = "Crowd flow prediction is an essential task benefiting a wide range of applications for the transportation system and public safety. However, it is a challenging problem due to the complex spatio-temporal dependence and the complicated impact of urban \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "110", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2021:FBN, author = "Kai Liu and Xiangyu Li and Zhihui Zhu and Lodewijk Brand and Hua Wang", title = "Factor-Bounded Nonnegative Matrix Factorization", journal = j-TKDD, volume = "15", number = "6", pages = "111:1--111:18", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451395", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451395", abstract = "Nonnegative Matrix Factorization (NMF) is broadly used to determine class membership in a variety of clustering applications. From movie recommendations and image clustering to visual feature extractions, NMF has applications to solve a large number of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "111", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2021:ACA, author = "Huandong Wang and Yong Li and Mu Du and Zhenhui Li and Depeng Jin", title = "{App2Vec}: Context-Aware Application Usage Prediction", journal = j-TKDD, volume = "15", number = "6", pages = "112:1--112:21", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3451396", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Jul 21 07:02:35 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451396", abstract = "Both app developers and service providers have strong motivations to understand when and where certain apps are used by users. However, it has been a challenging problem due to the highly skewed and noisy app usage data. Moreover, apps are regarded as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "112", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:DLV, author = "Fenglin Liu and Xian Wu and Shen Ge and Xuancheng Ren and Wei Fan and Xu Sun and Yuexian Zou", title = "{DiMBERT}: Learning Vision-Language Grounded Representations with Disentangled Multimodal-Attention", journal = j-TKDD, volume = "16", number = "1", pages = "1:1--1:19", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3447685", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447685", abstract = "Vision-and-language (V-L) tasks require the system to understand both vision content and natural language, thus learning fine-grained joint representations of vision and language (a.k.a. V-L representations) is of paramount importance. Recently, various \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2022:CDR, author = "Chen Gao and Yong Li and Fuli Feng and Xiangning Chen and Kai Zhao and Xiangnan He and Depeng Jin", title = "Cross-domain Recommendation with Bridge-Item Embeddings", journal = j-TKDD, volume = "16", number = "1", pages = "2:1--2:23", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3447683", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3447683", abstract = "Web systems that provide the same functionality usually share a certain amount of items. This makes it possible to combine data from different websites to improve recommendation quality, known as the cross-domain recommendation task. Despite many research \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2022:LVA, author = "Luyue Lin and Xin Zheng and Bo Liu and Wei Chen and Yanshan Xiao", title = "A Latent Variable Augmentation Method for Image Categorization with Insufficient Training Samples", journal = j-TKDD, volume = "16", number = "1", pages = "3:1--3:35", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451165", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451165", abstract = "Over the past few years, we have made great progress in image categorization based on convolutional neural networks (CNNs). These CNNs are always trained based on a large-scale image data set; however, people may only have limited training samples for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2022:GBS, author = "Jianliang Gao and Xiaoting Ying and Cong Xu and Jianxin Wang and Shichao Zhang and Zhao Li", title = "Graph-Based Stock Recommendation by Time-Aware Relational Attention Network", journal = j-TKDD, volume = "16", number = "1", pages = "4:1--4:21", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451397", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451397", abstract = "The stock market investors aim at maximizing their investment returns. Stock recommendation task is to recommend stocks with higher return ratios for the investors. Most stock prediction methods study the historical sequence patterns to predict stock \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2022:MML, author = "Yaojin Lin and Qinghua Hu and Jinghua Liu and Xingquan Zhu and Xindong Wu", title = "{MULFE}: Multi-Label Learning via Label-Specific Feature Space Ensemble", journal = j-TKDD, volume = "16", number = "1", pages = "5:1--5:24", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451392", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451392", abstract = "In multi-label learning, label correlations commonly exist in the data. Such correlation not only provides useful information, but also imposes significant challenges for multi-label learning. Recently, label-specific feature embedding has been proposed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2022:JPF, author = "Fandel Lin and Hsun-Ping Hsieh", title = "A Joint Passenger Flow Inference and Path Recommender System for Deploying New Routes and Stations of Mass Transit Transportation", journal = j-TKDD, volume = "16", number = "1", pages = "6:1--6:36", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451393", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451393", abstract = "In this work, a novel decision assistant system for urban transportation, called Route Scheme Assistant (RSA), is proposed to address two crucial issues that few former researches have focused on: route-based passenger flow (PF) inference and multivariant \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:BAM, author = "Huafeng Liu and Liping Jing and Jingxuan Wen and Pengyu Xu and Jian Yu and Michael K. Ng", title = "{Bayesian} Additive Matrix Approximation for Social Recommendation", journal = j-TKDD, volume = "16", number = "1", pages = "7:1--7:34", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451391", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451391", abstract = "Social relations between users have been proven to be a good type of auxiliary information to improve the recommendation performance. However, it is a challenging issue to sufficiently exploit the social relations and correctly determine the user \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2022:RCT, author = "Jinjin Guo and Longbing Cao and Zhiguo Gong", title = "Recurrent Coupled Topic Modeling over Sequential Documents", journal = j-TKDD, volume = "16", number = "1", pages = "8:1--8:32", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451530", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451530", abstract = "The abundant sequential documents such as online archival, social media, and news feeds are streamingly updated, where each chunk of documents is incorporated with smoothly evolving yet dependent topics. Such digital texts have attracted extensive \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2022:DLD, author = "Yunyan Guo and Jianzhong Li", title = "Distributed Latent {Dirichlet} Allocation on Streams", journal = j-TKDD, volume = "16", number = "1", pages = "9:1--9:20", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451528", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451528", abstract = "Latent Dirichlet Allocation (LDA) has been widely used for topic modeling, with applications spanning various areas such as natural language processing and information retrieval. While LDA on small and static datasets has been extensively studied, several \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Han:2022:EAI, author = "Juhee Han and Younghoon Lee", title = "Explainable Artificial Intelligence-Based Competitive Factor Identification", journal = j-TKDD, volume = "16", number = "1", pages = "10:1--10:11", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451529", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451529", abstract = "Competitor analysis is an essential component of corporate strategy, providing both offensive and defensive strategic contexts to identify opportunities and threats. The rapid development of social media has recently led to several methodologies and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Laishram:2022:MEA, author = "Ricky Laishram and Jeremy D. Wendt and Sucheta Soundarajan", title = "{MCS+}: an Efficient Algorithm for Crawling the Community Structure in Multiplex Networks", journal = j-TKDD, volume = "16", number = "1", pages = "11:1--11:32", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451527", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451527", abstract = "In this article, we consider the problem of crawling a multiplex network to identify the community structure of a layer-of-interest. A multiplex network is one where there are multiple types of relationships between the nodes. In many multiplex networks, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:GML, author = "Lichen Wang and Zhengming Ding and Yun Fu", title = "Generic Multi-label Annotation via Adaptive Graph and Marginalized Augmentation", journal = j-TKDD, volume = "16", number = "1", pages = "12:1--12:20", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451884", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3451884", abstract = "Multi-label learning recovers multiple labels from a single instance. It is a more challenging task compared with single-label manner. Most multi-label learning approaches need large-scale well-labeled samples to achieve high accurate performance. However,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moreo:2022:LTT, author = "Alejandro Moreo and Andrea Esuli and Fabrizio Sebastiani", title = "Lost in Transduction: Transductive Transfer Learning in Text Classification", journal = j-TKDD, volume = "16", number = "1", pages = "13:1--13:21", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3453146", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3453146", abstract = "Obtaining high-quality labelled data for training a classifier in a new application domain is often costly. Transfer Learning (a.k.a. ``Inductive Transfer'') tries to alleviate these costs by transferring, to the ``target'' domain of interest, knowledge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2022:MTP, author = "Yangfan Li and Kenli Li and Cen Chen and Xu Zhou and Zeng Zeng and Keqin Li", title = "Modeling Temporal Patterns with Dilated Convolutions for Time-Series Forecasting", journal = j-TKDD, volume = "16", number = "1", pages = "14:1--14:22", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3453724", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3453724", abstract = "Time-series forecasting is an important problem across a wide range of domains. Designing accurate and prompt forecasting algorithms is a non-trivial task, as temporal data that arise in real applications often involve both non-linear dynamics and linear \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2022:CCP, author = "Keyu Yang and Yunjun Gao and Lei Liang and Song Bian and Lu Chen and Baihua Zheng", title = "{CrowdTC}: Crowd-powered Learning for Text Classification", journal = j-TKDD, volume = "16", number = "1", pages = "15:1--15:23", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3457216", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3457216", abstract = "Text classification is a fundamental task in content analysis. Nowadays, deep learning has demonstrated promising performance in text classification compared with shallow models. However, almost all the existing models do not take advantage of the wisdom \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:JMH, author = "Haobing Liu and Yanmin Zhu and Tianzi Zang and Yanan Xu and Jiadi Yu and Feilong Tang", title = "Jointly Modeling Heterogeneous Student Behaviors and Interactions among Multiple Prediction Tasks", journal = j-TKDD, volume = "16", number = "1", pages = "16:1--16:24", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3458023", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3458023", abstract = "Prediction tasks about students have practical significance for both student and college. Making multiple predictions about students is an important part of a smart campus. For instance, predicting whether a student will fail to graduate can alert the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lee:2022:MMS, author = "Wu Lee and Yuliang Shi and Hongfeng Sun and Lin Cheng and Kun Zhang and Xinjun Wang and Zhiyong Chen", title = "{MSIPA}: Multi-Scale Interval Pattern-Aware Network for {ICU} Transfer Prediction", journal = j-TKDD, volume = "16", number = "1", pages = "17:1--17:17", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3458284", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3458284", abstract = "Accurate prediction of patients' ICU transfer events is of great significance for improving ICU treatment efficiency. ICU transition prediction task based on Electronic Health Records (EHR) is a temporal mining task like many other health informatics \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:BSF, author = "Min-Ling Zhang and Jun-Peng Fang and Yi-Bo Wang", title = "{BiLabel}-Specific Features for Multi-Label Classification", journal = j-TKDD, volume = "16", number = "1", pages = "18:1--18:23", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3458283", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3458283", abstract = "In multi-label classification, the task is to induce predictive models which can assign a set of relevant labels for the unseen instance. The strategy of label-specific features has been widely employed in learning from multi-label examples, where the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:NMV, author = "Bo Liu and Haowen Zhong and Yanshan Xiao", title = "New Multi-View Classification Method with Uncertain Data", journal = j-TKDD, volume = "16", number = "1", pages = "19:1--19:23", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3458282", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3458282", abstract = "Multi-view classification aims at designing a multi-view learning strategy to train a classifier from multi-view data, which are easily collected in practice. Most of the existing works focus on multi-view classification by assuming the multi-view data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Na:2022:USE, author = "Gyoung S. Na and Hyunju Chang", title = "Unsupervised Subspace Extraction via Deep Kernelized Clustering", journal = j-TKDD, volume = "16", number = "1", pages = "20:1--20:15", month = feb, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3459082", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3459082", abstract = "Feature extraction has been widely studied to find informative latent features and reduce the dimensionality of data. In particular, due to the difficulty in obtaining labeled data, unsupervised feature extraction has received much attention in data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Aggarwal:2022:CEC, author = "Charu C. Aggarwal", title = "Communication from the {Editor-in-Chief}: State of the {{\booktitle{ACM Transactions on Knowledge Discovery from Data}}}", journal = j-TKDD, volume = "16", number = "2", pages = "21e:1--21e:2", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3463950", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3463950", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21e", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:SUM, author = "Chunkai Zhang and Zilin Du and Yuting Yang and Wensheng Gan and Philip S. Yu", title = "On-Shelf Utility Mining of Sequence Data", journal = j-TKDD, volume = "16", number = "2", pages = "21:1--21:31", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3457570", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3457570", abstract = "Utility mining has emerged as an important and interesting topic owing to its wide application and considerable popularity. However, conventional utility mining methods have a bias toward items that have longer on-shelf time as they have a greater chance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tran:2022:CDP, author = "Cong Tran and Won-Yong Shin and Andreas Spitz", title = "Community Detection in Partially Observable Social Networks", journal = j-TKDD, volume = "16", number = "2", pages = "22:1--22:24", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461339", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3461339", abstract = "The discovery of community structures in social networks has gained significant attention since it is a fundamental problem in understanding the networks' topology and functions. However, most social network data are collected from partially observable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2022:CDL, author = "Zhao Li and Junshuai Song and Zehong Hu and Zhen Wang and Jun Gao", title = "Constrained Dual-Level Bandit for Personalized Impression Regulation in Online Ranking Systems", journal = j-TKDD, volume = "16", number = "2", pages = "23:1--23:23", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461340", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3461340", abstract = "Impression regulation plays an important role in various online ranking systems, e.g., e-commerce ranking systems always need to achieve local commercial demands on some pre-labeled target items like fresh item cultivation and fraudulent item \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ohare:2022:HVT, author = "Kevin O'hare and Anna Jurek-Loughrey and Cassio {De Campos}", title = "High-Value Token-Blocking: Efficient Blocking Method for Record Linkage", journal = j-TKDD, volume = "16", number = "2", pages = "24:1--24:17", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3450527", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3450527", abstract = "Data integration is an important component of Big Data analytics. One of the key challenges in data integration is record linkage, that is, matching records that represent the same real-world entity. Because of computational costs, methods referred to as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ding:2022:ESU, author = "Ming Ding and Tianyu Wang and Xudong Wang", title = "Establishing Smartphone User Behavior Model Based on Energy Consumption Data", journal = j-TKDD, volume = "16", number = "2", pages = "25:1--25:40", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461459", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3461459", abstract = "In smartphone data analysis, both energy consumption modeling and user behavior mining have been explored extensively, but the relationship between energy consumption and user behavior has been rarely studied. Such a relationship is explored over large-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Saude:2022:RRB, author = "Jo{\~a}o Sa{\'u}de and Guilherme Ramos and Ludovico Boratto and Carlos Caleiro", title = "A Robust Reputation-Based Group Ranking System and Its Resistance to Bribery", journal = j-TKDD, volume = "16", number = "2", pages = "26:1--26:35", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3462210", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3462210", abstract = "The spread of online reviews and opinions and its growing influence on people's behavior and decisions boosted the interest to extract meaningful information from this data deluge. Hence, crowdsourced ratings of products and services gained a critical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2022:EHG, author = "Hanlu Wu and Tengfei Ma and Lingfei Wu and Fangli Xu and Shouling Ji", title = "Exploiting Heterogeneous Graph Neural Networks with Latent Worker\slash Task Correlation Information for Label Aggregation in Crowdsourcing", journal = j-TKDD, volume = "16", number = "2", pages = "27:1--27:18", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3460865", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3460865", abstract = "Crowdsourcing has attracted much attention for its convenience to collect labels from non-expert workers instead of experts. However, due to the high level of noise from the non-experts, a label aggregation model that infers the true label from noisy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2022:MNV, author = "Hui-Jia Li and Lin Wang and Zhan Bu and Jie Cao and Yong Shi", title = "Measuring the Network Vulnerability Based on {Markov} Criticality", journal = j-TKDD, volume = "16", number = "2", pages = "28:1--28:24", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3464390", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3464390", abstract = "Vulnerability assessment-a critical issue for networks-attempts to foresee unexpected destructive events or hostile attacks in the whole system. In this article, we consider a new Markov global connectivity metric-Kemeny constant, and take its derivative \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:SBA, author = "Guangtao Wang and Gao Cong and Ying Zhang and Zhen Hai and Jieping Ye", title = "A Synopsis Based Approach for Itemset Frequency Estimation over Massive Multi-Transaction Stream", journal = j-TKDD, volume = "16", number = "2", pages = "29:1--29:30", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3465238", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3465238", abstract = "The streams where multiple transactions are associated with the same key are prevalent in practice, e.g., a customer has multiple shopping records arriving at different time. Itemset frequency estimation on such streams is very challenging since sampling \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yin:2022:WFM, author = "Jianfei Yin and Ruili Wang and Yeqing Guo and Yizhe Bai and Shunda Ju and Weili Liu and Joshua Zhexue Huang", title = "Wealth Flow Model: Online Portfolio Selection Based on Learning Wealth Flow Matrices", journal = j-TKDD, volume = "16", number = "2", pages = "30:1--30:27", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3464308", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3464308", abstract = "This article proposes a deep learning solution to the online portfolio selection problem based on learning a latent structure directly from a price time series. It introduces a novel wealth flow matrix for representing a latent structure that has special \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hidalgo:2022:DAD, author = "Juan I. G. Hidalgo and Silas G. T. C. Santos and Roberto S. M. Barros", title = "Dynamically Adjusting Diversity in Ensembles for the Classification of Data Streams with Concept Drift", journal = j-TKDD, volume = "16", number = "2", pages = "31:1--31:20", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3466616", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3466616", abstract = "A data stream can be defined as a system that continually generates a lot of data over time. Today, processing data streams requires new demands and challenging tasks in the data mining and machine learning areas. Concept Drift is a problem commonly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cantini:2022:LSH, author = "Riccardo Cantini and Fabrizio Marozzo and Giovanni Bruno and Paolo Trunfio", title = "Learning Sentence-to-Hashtags Semantic Mapping for Hashtag Recommendation on Microblogs", journal = j-TKDD, volume = "16", number = "2", pages = "32:1--32:26", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3466876", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3466876", abstract = "The growing use of microblogging platforms is generating a huge amount of posts that need effective methods to be classified and searched. In Twitter and other social media platforms, hashtags are exploited by users to facilitate the search, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Abebe:2022:ODO, author = "Rediet Abebe and T.-H. Hubert Chan and Jon Kleinberg and Zhibin Liang and David Parkes and Mauro Sozio and Charalampos E. Tsourakakis", title = "Opinion Dynamics Optimization by Varying Susceptibility to Persuasion via Non-Convex Local Search", journal = j-TKDD, volume = "16", number = "2", pages = "33:1--33:34", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3466617", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3466617", abstract = "A long line of work in social psychology has studied variations in people's susceptibility to persuasion-the extent to which they are willing to modify their opinions on a topic. This body of literature suggests an interesting perspective on theoretical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2022:SHS, author = "Yang Yang and Hongchen Wei and Zhen-Qiang Sun and Guang-Yu Li and Yuanchun Zhou and Hui Xiong and Jian Yang", title = "{S2OSC}: a Holistic Semi-Supervised Approach for Open Set Classification", journal = j-TKDD, volume = "16", number = "2", pages = "34:1--34:27", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3468675", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3468675", abstract = "Open set classification (OSC) tackles the problem of determining whether the data are in-class or out-of-class during inference, when only provided with a set of in-class examples at training time. Traditional OSC methods usually train discriminative or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:EHI, author = "Yiding Zhang and Xiao Wang and Nian Liu and Chuan Shi", title = "Embedding Heterogeneous Information Network in Hyperbolic Spaces", journal = j-TKDD, volume = "16", number = "2", pages = "35:1--35:23", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3468674", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3468674", abstract = "Heterogeneous information network (HIN) embedding, aiming to project HIN into a low-dimensional space, has attracted considerable research attention. Most of the existing HIN embedding methods focus on preserving the inherent network structure and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:CCN, author = "Xueyuan Wang and Hongpo Zhang and Zongmin Wang and Yaqiong Qiao and Jiangtao Ma and Honghua Dai", title = "{Con\&Net}: a Cross-Network Anchor Link Discovery Method Based on Embedding Representation", journal = j-TKDD, volume = "16", number = "2", pages = "36:1--36:18", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469083", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3469083", abstract = "Cross-network anchor link discovery is an important research problem and has many applications in heterogeneous social network. Existing schemes of cross-network anchor link discovery can provide reasonable link discovery results, but the quality of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:HVA, author = "Hangbin Zhang and Raymond K. Wong and Victor W. Chu", title = "Hybrid Variational Autoencoder for Recommender Systems", journal = j-TKDD, volume = "16", number = "2", pages = "37:1--37:37", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3470659", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3470659", abstract = "E-commerce platforms heavily rely on automatic personalized recommender systems, e.g., collaborative filtering models, to improve customer experience. Some hybrid models have been proposed recently to address the deficiency of existing models. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Oliveira:2022:ALS, author = "Lucas {Santos De Oliveira and} Pedro O. S. Vaz-De-Melo and Aline {Carneiro Viana}", title = "Assessing Large-Scale Power Relations among Locations from Mobility Data", journal = j-TKDD, volume = "16", number = "2", pages = "38:1--38:31", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3470770", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3470770", abstract = "The pervasiveness of smartphones has shaped our lives, social norms, and the structure that dictates human behavior. They now directly influence how individuals demand resources or interact with network services. From this scenario, identifying key \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:KKR, author = "Zhenyu Zhang and Lei Zhang and Dingqi Yang and Liu Yang", title = "{KRAN}: Knowledge Refining Attention Network for Recommendation", journal = j-TKDD, volume = "16", number = "2", pages = "39:1--39:20", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3470783", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3470783", abstract = "Recommender algorithms combining knowledge graph and graph convolutional network are becoming more and more popular recently. Specifically, attributes describing the items to be recommended are often used as additional information. These attributes along \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2022:STE, author = "Liang Zhao and Yuyang Gao and Jieping Ye and Feng Chen and Yanfang Ye and Chang-Tien Lu and Naren Ramakrishnan", title = "Spatio-Temporal Event Forecasting Using Incremental Multi-Source Feature Learning", journal = j-TKDD, volume = "16", number = "2", pages = "40:1--40:28", month = apr, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3464976", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Sep 14 07:09:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3464976", abstract = "The forecasting of significant societal events such as civil unrest and economic crisis is an interesting and challenging problem which requires both timeliness, precision, and comprehensiveness. Significant societal events are influenced and indicated \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lei:2022:ODR, author = "Shuo Lei and Xuchao Zhang and Liang Zhao and Arnold P. Boedihardjo and Chang-Tien Lu", title = "Online and Distributed Robust Regressions with Extremely Noisy Labels", journal = j-TKDD, volume = "16", number = "3", pages = "41:1--41:24", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473038", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3473038", abstract = "In today's era of big data, robust least-squares regression becomes a more challenging problem when considering the extremely corrupted labels along with explosive growth of datasets. Traditional robust methods can handle the noise but suffer from several \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2022:KDA, author = "Xingjian Li and Haoyi Xiong and Zeyu Chen and Jun Huan and Ji Liu and Cheng-Zhong Xu and Dejing Dou", title = "Knowledge Distillation with Attention for Deep Transfer Learning of Convolutional Networks", journal = j-TKDD, volume = "16", number = "3", pages = "42:1--42:20", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473912", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3473912", abstract = "Transfer learning through fine-tuning a pre-trained neural network with an extremely large dataset, such as ImageNet, can significantly improve and accelerate training while the accuracy is frequently bottlenecked by the limited dataset size of the new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nashaat:2022:SSE, author = "Mona Nashaat and Aindrila Ghosh and James Miller and Shaikh Quader", title = "Semi-Supervised Ensemble Learning for Dealing with Inaccurate and Incomplete Supervision", journal = j-TKDD, volume = "16", number = "3", pages = "43:1--43:33", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473910", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3473910", abstract = "In real-world tasks, obtaining a large set of noise-free data can be prohibitively expensive. Therefore, recent research tries to enable machine learning to work with weakly supervised datasets, such as inaccurate or incomplete data. However, the previous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shao:2022:NEM, author = "Ping Shao and Yang Yang and Shengyao Xu and Chunping Wang", title = "Network Embedding via Motifs", journal = j-TKDD, volume = "16", number = "3", pages = "44:1--44:20", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473911", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3473911", abstract = "Network embedding has emerged as an effective way to deal with downstream tasks, such as node classification [16, 31, 42]. Most existing methods leverage multi-similarities between nodes such as connectivity, which considers vertices that are closely \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kuang:2022:BSS, author = "Kun Kuang and Hengtao Zhang and Runze Wu and Fei Wu and Yueting Zhuang and Aijun Zhang", title = "Balance-Subsampled Stable Prediction Across Unknown Test Data", journal = j-TKDD, volume = "16", number = "3", pages = "45:1--45:21", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477052", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477052", abstract = "In data mining and machine learning, it is commonly assumed that training and test data share the same population distribution. However, this assumption is often violated in practice because of the sample selection bias, which might induce the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2022:DDG, author = "Ling Chen and Xing Tang and Weiqi Chen and Yuntao Qian and Yansheng Li and Yongjun Zhang", title = "{DACHA}: a Dual Graph Convolution Based Temporal Knowledge Graph Representation Learning Method Using Historical Relation", journal = j-TKDD, volume = "16", number = "3", pages = "46:1--46:18", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477051", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477051", abstract = "Temporal knowledge graph (TKG) representation learning embeds relations and entities into a continuous low-dimensional vector space by incorporating temporal information. Latest studies mainly aim at learning entity representations by modeling entity \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:CAS, author = "Huandong Wang and Yong Li and Junjie Lin and Hancheng Cao and Depeng Jin", title = "Context-Aware Semantic Annotation of Mobility Records", journal = j-TKDD, volume = "16", number = "3", pages = "47:1--47:20", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477048", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477048", abstract = "The wide adoption of mobile devices has provided us with a massive volume of human mobility records. However, a large portion of these records is unlabeled, i.e., only have GPS coordinates without semantic information (e.g., Point of Interest (POI)). To \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shi:2022:CLC, author = "Tian Shi and Xuchao Zhang and Ping Wang and Chandan K. Reddy", title = "Corpus-level and Concept-based Explanations for Interpretable Document Classification", journal = j-TKDD, volume = "16", number = "3", pages = "48:1--48:17", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477539", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477539", abstract = "Using attention weights to identify information that is important for models' decision making is a popular approach to interpret attention-based neural networks. This is commonly realized in practice through the generation of a heat-map for every single \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2022:CAS, author = "Jie Feng and Yong Li and Ziqian Lin and Can Rong and Funing Sun and Diansheng Guo and Depeng Jin", title = "Context-aware Spatial-Temporal Neural Network for Citywide Crowd Flow Prediction via Modeling Long-range Spatial Dependency", journal = j-TKDD, volume = "16", number = "3", pages = "49:1--49:21", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477577", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477577", abstract = "Crowd flow prediction is of great importance in a wide range of applications from urban planning, traffic control to public safety. It aims at predicting the inflow (the traffic of crowds entering a region in a given time interval) and outflow (the \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2022:UAN, author = "Yang Zhou and Jiaxiang Ren and Ruoming Jin and Zijie Zhang and Jingyi Zheng and Zhe Jiang and Da Yan and Dejing Dou", title = "Unsupervised Adversarial Network Alignment with Reinforcement Learning", journal = j-TKDD, volume = "16", number = "3", pages = "50:1--50:29", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477050", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477050", abstract = "Network alignment, which aims at learning a matching between the same entities across multiple information networks, often suffers challenges from feature inconsistency, high-dimensional features, to unstable alignment results. This article presents a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2022:NMN, author = "Youxi Wu and Lanfang Luo and Yan Li and Lei Guo and Philippe Fournier-Viger and Xingquan Zhu and Xindong Wu", title = "{NTP-Miner}: Nonoverlapping Three-Way Sequential Pattern Mining", journal = j-TKDD, volume = "16", number = "3", pages = "51:1--51:21", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3480245", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3480245", abstract = "Nonoverlapping sequential pattern mining is an important type of sequential pattern mining (SPM) with gap constraints, which not only can reveal interesting patterns to users but also can effectively reduce the search space using the Apriori (anti-\ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2022:NPO, author = "Yuanchun Jiang and Ruicheng Liang and Ji Zhang and Jianshan Sun and Yezheng Liu and Yang Qian", title = "Network Public Opinion Detection During the Coronavirus Pandemic: a Short-Text Relational Topic Model", journal = j-TKDD, volume = "16", number = "3", pages = "52:1--52:27", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3480246", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3480246", abstract = "Online social media provides rich and varied information reflecting the significant concerns of the public during the coronavirus pandemic. Analyzing what the public is concerned with from social media information can support policy-makers to maintain the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2022:GCI, author = "Heli Sun and Yang Li and Bing Lv and Wujie Yan and Liang He and Shaojie Qiao and Jianbin Huang", title = "{Graph Community Infomax}", journal = j-TKDD, volume = "16", number = "3", pages = "53:1--53:21", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3480244", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3480244", abstract = "Graph representation learning aims at learning low-dimension representations for nodes in graphs, and has been proven very useful in several downstream tasks. In this article, we propose a new model, Graph Community Infomax (GCI), that can adversarial \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:SGQ, author = "Guliu Liu and Lei Li and Guanfeng Liu and Xindong Wu", title = "Social Group Query Based on Multi-Fuzzy-Constrained Strong Simulation", journal = j-TKDD, volume = "16", number = "3", pages = "54:1--54:27", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3481640", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3481640", abstract = "Traditional social group analysis mostly uses interaction models, event models, or other social network analysis methods to identify and distinguish groups. This type of method can divide social participants into different groups based on their geographic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2022:NFB, author = "Shangsong Liang and Zhuo Ouyang and Zaiqiao Meng", title = "A Normalizing Flow-Based Co-Embedding Model for Attributed Networks", journal = j-TKDD, volume = "16", number = "3", pages = "55:1--55:31", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477049", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3477049", abstract = "Network embedding is a technique that aims at inferring the low-dimensional representations of nodes in a semantic space. In this article, we study the problem of inferring the low-dimensional representations of both nodes and attributes for attributed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2022:TAG, author = "Yonghui Xu and Shengjie Sun and Huiguo Zhang and Chang'an Yi and Yuan Miao and Dong Yang and Xiaonan Meng and Yi Hu and Ke Wang and Huaqing Min and Hengjie Song and Chuanyan Miao", title = "Time-Aware Graph Embedding: a Temporal Smoothness and Task-Oriented Approach", journal = j-TKDD, volume = "16", number = "3", pages = "56:1--56:23", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3480243", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3480243", abstract = "Knowledge graph embedding, which aims at learning the low-dimensional representations of entities and relationships, has attracted considerable research efforts recently. However, most knowledge graph embedding methods focus on the structural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sowah:2022:HEH, author = "Robert A. Sowah and Bernard Kuditchar and Godfrey A. Mills and Amevi Acakpovi and Raphael A. Twum and Gifty Buah and Robert Agboyi", title = "{HCBST}: an Efficient Hybrid Sampling Technique for Class Imbalance Problems", journal = j-TKDD, volume = "16", number = "3", pages = "57:1--57:37", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488280", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3488280", abstract = "Class imbalance problem is prevalent in many real-world domains. It has become an active area of research. In binary classification problems, imbalance learning refers to learning from a dataset with a high degree of skewness to the negative class. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jin:2022:TUE, author = "Junchen Jin and Mark Heimann and Di Jin and Danai Koutra", title = "Toward Understanding and Evaluating Structural Node Embeddings", journal = j-TKDD, volume = "16", number = "3", pages = "58:1--58:32", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3481639", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3481639", abstract = "While most network embedding techniques model the proximity between nodes in a network, recently there has been significant interest in structural embeddings that are based on node equivalences, a notion rooted in sociology: equivalences or positions are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2022:DFE, author = "Mengzhuo Guo and Zhongzhi Xu and Qingpeng Zhang and Xiuwu Liao and Jiapeng Liu", title = "Deciphering Feature Effects on Decision-Making in Ordinal Regression Problems: an Explainable Ordinal Factorization Model", journal = j-TKDD, volume = "16", number = "3", pages = "59:1--59:26", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487048", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3487048", abstract = "Ordinal regression predicts the objects' labels that exhibit a natural ordering, which is vital to decision-making problems such as credit scoring and clinical diagnosis. In these problems, the ability to explain how the individual features and their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2022:SMH, author = "Jerry Chun-Wei Lin and Youcef Djenouri and Gautam Srivastava and Yuanfa Li and Philip S. Yu", title = "Scalable Mining of High-Utility Sequential Patterns With Three-Tier {MapReduce} Model", journal = j-TKDD, volume = "16", number = "3", pages = "60:1--60:26", month = jun, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487046", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Dec 10 11:04:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3487046", abstract = "High-utility sequential pattern mining (HUSPM) is a hot research topic in recent decades since it combines both sequential and utility properties to reveal more information and knowledge rather than the traditional frequent itemset mining or sequential \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gupta:2022:CDL, author = "Manish Gupta and Puneet Agrawal", title = "Compression of Deep Learning Models for Text: a Survey", journal = j-TKDD, volume = "16", number = "4", pages = "61:1--61:55", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487045", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3487045", abstract = "In recent years, the fields of natural language processing (NLP) and information retrieval (IR) have made tremendous progress thanks to deep learning models like Recurrent Neural Networks (RNNs), Gated Recurrent Units (GRUs) and Long Short-Term Memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:FMM, author = "Chang Liu and Jie Yan and Feiyue Guo and Min Guo", title = "Forecasting the Market with Machine Learning Algorithms: an Application of {NMC-BERT-LSTM-DQN-X} Algorithm in Quantitative Trading", journal = j-TKDD, volume = "16", number = "4", pages = "62:1--62:22", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488378", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3488378", abstract = "Although machine learning (ML) algorithms have been widely used in forecasting the trend of stock market indices, they failed to consider the following crucial aspects for market forecasting: (1) that investors' emotions and attitudes toward future market \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:RRH, author = "Danlu Liu and Yu Li and William Baskett and Dan Lin and Chi-Ren Shyu", title = "{RHPTree}-Risk Hierarchical Pattern Tree for Scalable Long Pattern Mining", journal = j-TKDD, volume = "16", number = "4", pages = "63:1--63:33", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488380", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3488380", abstract = "Risk patterns are crucial in biomedical research and have served as an important factor in precision health and disease prevention. Despite recent development in parallel and high-performance computing, existing risk pattern mining methods still struggle \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2022:MIF, author = "Muyang Ma and Pengjie Ren and Zhumin Chen and Zhaochun Ren and Lifan Zhao and Peiyu Liu and Jun Ma and Maarten de Rijke", title = "Mixed Information Flow for Cross-Domain Sequential Recommendations", journal = j-TKDD, volume = "16", number = "4", pages = "64:1--64:32", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487331", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3487331", abstract = "Cross-domain sequential recommendation is the task of predict the next item that the user is most likely to interact with based on past sequential behavior from multiple domains. One of the key challenges in cross-domain sequential recommendation is to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fu:2022:TTR, author = "Zhe Fu and Li Yu and Xi Niu", title = "{TRACE}: Travel Reinforcement Recommendation Based on Location-Aware Context Extraction", journal = j-TKDD, volume = "16", number = "4", pages = "65:1--65:22", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487047", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3487047", abstract = "As the popularity of online travel platforms increases, users tend to make ad-hoc decisions on places to visit rather than preparing the detailed tour plans in advance. Under the situation of timeliness and uncertainty of users' demand, how to integrate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2022:CFS, author = "Kui Yu and Yajing Yang and Wei Ding", title = "Causal Feature Selection with Missing Data", journal = j-TKDD, volume = "16", number = "4", pages = "66:1--66:24", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488055", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3488055", abstract = "Causal feature selection aims at learning the Markov blanket (MB) of a class variable for feature selection. The MB of a class variable implies the local causal structure among the class variable and its MB and all other features are probabilistically \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2022:TES, author = "Fei Gao and Jiada Li and Yisu Ge and Jianwen Shao and Shufang Lu and Libo Weng", title = "A Trajectory Evaluator by Sub-tracks for Detecting {VOT}-based Anomalous Trajectory", journal = j-TKDD, volume = "16", number = "4", pages = "67:1--67:19", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3490032", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3490032", abstract = "With the popularization of visual object tracking (VOT), more and more trajectory data are obtained and have begun to gain widespread attention in the fields of mobile robots, intelligent video surveillance, and the like. How to clean the anomalous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jafariakinabad:2022:SSR, author = "Fereshteh Jafariakinabad and Kien A. Hua", title = "A Self-Supervised Representation Learning of Sentence Structure for Authorship Attribution", journal = j-TKDD, volume = "16", number = "4", pages = "68:1--68:16", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491203", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3491203", abstract = "The syntactic structure of sentences in a document substantially informs about its authorial writing style. Sentence representation learning has been widely explored in recent years and it has been shown that it improves the generalization of different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2022:PPM, author = "Honghui Xu and Zhipeng Cai and Wei Li", title = "Privacy-Preserving Mechanisms for Multi-Label Image Recognition", journal = j-TKDD, volume = "16", number = "4", pages = "69:1--69:21", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491231", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3491231", abstract = "Multi-label image recognition has been an indispensable fundamental component for many real computer vision applications. However, a severe threat of privacy leakage in multi-label image recognition has been overlooked by existing studies. To fill this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Abulaish:2022:DSK, author = "Muhammad Abulaish and Mohd Fazil and Mohammed J. Zaki", title = "Domain-Specific Keyword Extraction Using Joint Modeling of Local and Global Contextual Semantics", journal = j-TKDD, volume = "16", number = "4", pages = "70:1--70:30", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494560", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494560", abstract = "Domain-specific keyword extraction is a vital task in the field of text mining. There are various research tasks, such as spam e-mail classification, abusive language detection, sentiment analysis, and emotion mining, where a set of domain-specific \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yuan:2022:AMS, author = "Mu Yuan and Lan Zhang and Xiang-Yang Li and Lin-Zhuo Yang and Hui Xiong", title = "Adaptive Model Scheduling for Resource-efficient Data Labeling", journal = j-TKDD, volume = "16", number = "4", pages = "71:1--71:22", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494559", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494559", abstract = "Labeling data (e.g., labeling the people, objects, actions, and scene in images) comprehensively and efficiently is a widely needed but challenging task. Numerous models were proposed to label various data and many approaches were designed to enhance the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:DEL, author = "Min-Ling Zhang and Jing-Han Wu and Wei-Xuan Bao", title = "Disambiguation Enabled Linear Discriminant Analysis for Partial Label Dimensionality Reduction", journal = j-TKDD, volume = "16", number = "4", pages = "72:1--72:18", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494565", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494565", abstract = "As an emerging weakly supervised learning framework, partial label learning considers inaccurate supervision where each training example is associated with multiple candidate labels among which only one is valid. In this article, a first attempt toward \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2022:AMP, author = "Chenji Huang and Yixiang Fang and Xuemin Lin and Xin Cao and Wenjie Zhang", title = "{ABLE}: Meta-Path Prediction in Heterogeneous Information Networks", journal = j-TKDD, volume = "16", number = "4", pages = "73:1--73:21", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494558", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494558", abstract = "Given a heterogeneous information network (HIN) H, a head node h, a meta-path P, and a tail node t, the meta-path prediction aims at predicting whether h can be linked to t by an instance of P. Most existing solutions either require predefined meta-paths, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yuan:2022:AIC, author = "Junkun Yuan and Anpeng Wu and Kun Kuang and Bo Li and Runze Wu and Fei Wu and Lanfen Lin", title = "{Auto IV}: Counterfactual Prediction via Automatic Instrumental Variable Decomposition", journal = j-TKDD, volume = "16", number = "4", pages = "74:1--74:20", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494568", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494568", abstract = "Instrumental variables (IVs), sources of treatment randomization that are conditionally independent of the outcome, play an important role in causal inference with unobserved confounders. However, the existing IV-based counterfactual prediction methods \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "74", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bhatia:2022:RTA, author = "Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji Yoon and Kijung Shin and Christos Faloutsos", title = "Real-Time Anomaly Detection in Edge Streams", journal = j-TKDD, volume = "16", number = "4", pages = "75:1--75:22", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494564", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494564", abstract = "Given a stream of graph edges from a dynamic graph, how can we assign anomaly scores to edges in an online manner, for the purpose of detecting unusual behavior, using constant time and memory? Existing approaches aim to detect individually surprising. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "75", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sheshbolouki:2022:SBA, author = "Aida Sheshbolouki and M. Tamer {\"O}zsu", title = "{sGrapp}: Butterfly Approximation in Streaming Graphs", journal = j-TKDD, volume = "16", number = "4", pages = "76:1--76:43", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3495011", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3495011", abstract = "We study the fundamental problem of butterfly (i.e., (2,2)-bicliques) counting in bipartite streaming graphs. Similar to triangles in unipartite graphs, enumerating butterflies is crucial in understanding the structure of bipartite graphs. This benefits \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "76", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2022:MGL, author = "Hanrui Wu and Michael K. Ng", title = "Multiple Graphs and Low-Rank Embedding for Multi-Source Heterogeneous Domain Adaptation", journal = j-TKDD, volume = "16", number = "4", pages = "77:1--77:25", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3492804", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3492804", abstract = "Multi-source domain adaptation is a challenging topic in transfer learning, especially when the data of each domain are represented by different kinds of features, i.e., Multi-source Heterogeneous Domain Adaptation (MHDA). It is important to take \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "77", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Prokhorenkova:2022:WLM, author = "Liudmila Prokhorenkova and Alexey Tikhonov and Nelly Litvak", title = "When Less Is More: Systematic Analysis of Cascade-Based Community Detection", journal = j-TKDD, volume = "16", number = "4", pages = "78:1--78:22", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494563", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494563", abstract = "Information diffusion, spreading of infectious diseases, and spreading of rumors are fundamental processes occurring in real-life networks. In many practical cases, one can observe when nodes become infected, but the underlying network, over which a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "78", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2022:DTA, author = "Xu Yang and Chao Song and Mengdi Yu and Jiqing Gu and Ming Liu", title = "Distributed Triangle Approximately Counting Algorithms in Simple Graph Stream", journal = j-TKDD, volume = "16", number = "4", pages = "79:1--79:43", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494562", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494562", abstract = "Recently, the counting algorithm of local topology structures, such as triangles, has been widely used in social network analysis, recommendation systems, user portraits and other fields. At present, the problem of counting global and local triangles in a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "79", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2022:HCN, author = "Hanrui Wu and Michael K. Ng", title = "Hypergraph Convolution on Nodes-Hyperedges Network for Semi-Supervised Node Classification", journal = j-TKDD, volume = "16", number = "4", pages = "80:1--80:19", month = aug, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494567", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Jan 14 06:33:11 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494567", abstract = "Hypergraphs have shown great power in representing high-order relations among entities, and lots of hypergraph-based deep learning methods have been proposed to learn informative data representations for the node classification problem. However, most of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "80", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shen:2022:MTM, author = "Yanyan Shen and Baoyuan Ou and Ranzhen Li", title = "{MBN}: Towards Multi-Behavior Sequence Modeling for Next Basket Recommendation", journal = j-TKDD, volume = "16", number = "5", pages = "81:1--81:23", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3497748", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3497748", abstract = "Next basket recommendation aims at predicting the next set of items that a user would likely purchase together, which plays an important role in e-commerce platforms. Unlike conventional item recommendation, the next basket recommendation focuses on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "81", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ke:2022:MRG, author = "Xiangyu Ke and Arijit Khan and Francesco Bonchi", title = "Multi-relation Graph Summarization", journal = j-TKDD, volume = "16", number = "5", pages = "82:1--82:30", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494561", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494561", abstract = "Graph summarization is beneficial in a wide range of applications, such as visualization, interactive and exploratory analysis, approximate query processing, reducing the on-disk storage footprint, and graph processing in modern hardware. However, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "82", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2022:OLB, author = "Weirong Chen and Jiaqi Zheng and Haoyu Yu and Guihai Chen and Yixin Chen and Dongsheng Li", title = "Online Learning Bipartite Matching with Non-stationary Distributions", journal = j-TKDD, volume = "16", number = "5", pages = "83:1--83:22", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502734", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502734", abstract = "Online bipartite matching has attracted wide interest since it can successfully model the popular online car-hailing problem and sharing economy. Existing works consider this problem under either adversary setting or i.i.d. setting. The former is too \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "83", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ihou:2022:SVO, author = "Koffi Eddy Ihou and Manar Amayri and Nizar Bouguila", title = "Stochastic Variational Optimization of a Hierarchical {Dirichlet} Process Latent Beta-{Liouville} Topic Model", journal = j-TKDD, volume = "16", number = "5", pages = "84:1--84:48", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502727", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502727", abstract = "In topic models, collections are organized as documents where they arise as mixtures over latent clusters called topics. A topic is a distribution over the vocabulary. In large-scale applications, parametric or finite topic mixture models such as LDA \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "84", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Davvetas:2022:ETL, author = "Athanasios Davvetas and Iraklis A. Klampanos and Spiros Skiadopoulos and Vangelis Karkaletsis", title = "Evidence Transfer: Learning Improved Representations According to External Heterogeneous Task Outcomes", journal = j-TKDD, volume = "16", number = "5", pages = "85:1--85:22", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502732", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502732", abstract = "Unsupervised representation learning tends to produce generic and reusable latent representations. However, these representations can often miss high-level features or semantic information, since they only observe the implicit properties of the dataset. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "85", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Varde:2022:CES, author = "Aparna S. Varde", title = "Computational Estimation by Scientific Data Mining with Classical Methods to Automate Learning Strategies of Scientists", journal = j-TKDD, volume = "16", number = "5", pages = "86:1--86:52", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502736", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502736", abstract = "Experimental results are often plotted as 2-dimensional graphical plots (aka graphs) in scientific domains depicting dependent versus independent variables to aid visual analysis of processes. Repeatedly performing laboratory experiments consumes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "86", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2022:OSS, author = "Peng Zhou and Shu Zhao and Yuanting Yan and Xindong Wu", title = "Online Scalable Streaming Feature Selection via Dynamic Decision", journal = j-TKDD, volume = "16", number = "5", pages = "87:1--87:20", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502737", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502737", abstract = "Feature selection is one of the core concepts in machine learning, which hugely impacts the model's performance. For some real-world applications, features may exist in a stream mode that arrives one by one over time, while we cannot know the exact number \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "87", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pooja:2022:EHO, author = "Km Pooja and Samrat Mondal and Joydeep Chandra", title = "Exploiting Higher Order Multi-dimensional Relationships with Self-attention for Author Name Disambiguation", journal = j-TKDD, volume = "16", number = "5", pages = "88:1--88:23", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502730", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502730", abstract = "Name ambiguity is a prevalent problem in scholarly publications due to the unprecedented growth of digital libraries and number of researchers. An author is identified by their name in the absence of a unique identifier. The documents of an author are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "88", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pei:2022:BHB, author = "Shuyu Pei and Kun Xie and Xin Wang and Gaogang Xie and Kenli Li and Wei Li and Yanbiao Li and Jigang Wen", title = "{B$_h$BF}: a {Bloom} Filter Using {B$_h$} Sequences for Multi-set Membership Query", journal = j-TKDD, volume = "16", number = "5", pages = "89:1--89:26", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502735", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502735", abstract = "Multi-set membership query is a fundamental issue for network functions such as packet processing and state machines monitoring. Given the rigid query speed and memory requirements, it would be promising if a multi-set query algorithm can be designed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "89", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2022:DED, author = "Ling Chen and Hongyu Shi", title = "{DexDeepFM}: Ensemble Diversity Enhanced Extreme Deep Factorization Machine Model", journal = j-TKDD, volume = "16", number = "5", pages = "90:1--90:17", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3505272", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3505272", abstract = "Predicting user positive response (e.g., purchases and clicks) probability is a critical task in Web applications. To identify predictive features from raw data, the state-of-the-art extreme deep factorization machine model (xDeepFM) introduces a new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "90", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Singh:2022:MLD, author = "Shikha Singh and Emilie Chouzenoux and Giovanni Chierchia and Angshul Majumdar", title = "Multi-label Deep Convolutional Transform Learning for Non-intrusive Load Monitoring", journal = j-TKDD, volume = "16", number = "5", pages = "91:1--91:6", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502729", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502729", abstract = "The objective of this letter is to propose a novel computational method to learn the state of an appliance (ON / OFF) given the aggregate power consumption recorded by the smart-meter. We formulate a multi-label classification problem where the classes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "91", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sato:2022:CTG, author = "Ryoma Sato and Makoto Yamada and Hisashi Kashima", title = "Constant Time Graph Neural Networks", journal = j-TKDD, volume = "16", number = "5", pages = "92:1--92:31", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502733", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502733", abstract = "The recent advancements in graph neural networks (GNNs) have led to state-of-the-art performances in various applications, including chemo-informatics, question-answering systems, and recommender systems. However, scaling up these methods to huge graphs, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "92", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ling:2022:PAP, author = "Zhaolong Ling and Kui Yu and Lin Liu and Jiuyong Li and Yiwen Zhang and Xindong Wu", title = "{PSL}: an Algorithm for Partial {Bayesian} Network Structure Learning", journal = j-TKDD, volume = "16", number = "5", pages = "93:1--93:25", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3508071", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3508071", abstract = "Learning partial Bayesian network (BN) structure is an interesting and challenging problem. In this challenge, it is computationally expensive to use global BN structure learning algorithms, while only one part of a BN structure is interesting, local BN \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "93", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sharma:2022:IDA, author = "Ms Promila Sharma and Uma Meena and Girish Kumar Sharma", title = "Intelligent Data Analysis using Optimized Support Vector Machine Based Data Mining Approach for Tourism Industry", journal = j-TKDD, volume = "16", number = "5", pages = "94:1--94:20", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494566", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3494566", abstract = "Data analysis involves the deployment of sophisticated approaches from data mining methods, information theory, and artificial intelligence in various fields like tourism, hospitality, and so on for the extraction of knowledge from the gathered and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "94", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huai:2022:RML, author = "Mengdi Huai and Tianhang Zheng and Chenglin Miao and Liuyi Yao and Aidong Zhang", title = "On the Robustness of Metric Learning: an Adversarial Perspective", journal = j-TKDD, volume = "16", number = "5", pages = "95:1--95:25", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502726", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502726", abstract = "Metric learning aims at automatically learning a distance metric from data so that the precise similarity between data instances can be faithfully reflected, and its importance has long been recognized in many fields. An implicit assumption in existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "95", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qiu:2022:GNN, author = "Zhaopeng Qiu and Yunfan Hu and Xian Wu", title = "Graph Neural News Recommendation with User Existing and Potential Interest Modeling", journal = j-TKDD, volume = "16", number = "5", pages = "96:1--96:17", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511708", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3511708", abstract = "Personalized news recommendations can alleviate the information overload problem. To enable personalized recommendation, one critical step is to learn a comprehensive user representation to model her/his interests. Many existing works learn user \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "96", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Goel:2022:QIP, author = "Kanika Goel and Sander J. J. Leemans and Niels Martin and Moe T. Wynn", title = "Quality-Informed Process Mining: a Case for Standardised Data Quality Annotations", journal = j-TKDD, volume = "16", number = "5", pages = "97:1--97:47", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511707", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3511707", abstract = "Real-life event logs, reflecting the actual executions of complex business processes, are faced with numerous data quality issues. Extensive data sanity checks and pre-processing are usually needed before historical data can be used as input to obtain \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "97", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2022:WWW, author = "Hao Liu and Qingyu Guo and Hengshu Zhu and Fuzhen Zhuang and Shenwen Yang and Dejing Dou and Hui Xiong", title = "Who will Win the Data Science Competition? {Insights} from {KDD Cup 2019} and Beyond", journal = j-TKDD, volume = "16", number = "5", pages = "98:1--98:24", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511896", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3511896", abstract = "Data science competitions are becoming increasingly popular for enterprises collecting advanced innovative solutions and allowing contestants to sharpen their data science skills. Most existing studies about data science competitions have a focus on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "98", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Oliveira:2022:AMT, author = "Saullo H. G. Oliveira and Andr{\'e} R. Gon{\c{c}}alves and Fernando J. {Von Zuben}", title = "Asymmetric Multi-Task Learning with Local Transference", journal = j-TKDD, volume = "16", number = "5", pages = "99:1--99:30", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3514252", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3514252", abstract = "In this article, we present the Group Asymmetric Multi-Task Learning (GAMTL) algorithm that automatically learns from data how tasks transfer information among themselves at the level of a subset of features. In practice, for each group of features GAMTL \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "99", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhong:2022:CFC, author = "Sheng Zhong and Vinicius M. A. Souza and Abdullah Mueen", title = "Combining Filtering and Cross-Correlation Efficiently for Streaming Time Series", journal = j-TKDD, volume = "16", number = "5", pages = "100:1--100:24", month = oct, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502738", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed May 25 07:43:38 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502738", abstract = "Monitoring systems have hundreds or thousands of distributed sensors gathering and transmitting real-time streaming data. The early detection of events in these systems, such as an earthquake in a seismic monitoring system, is the base for essential tasks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "100", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jirina:2022:DFO, author = "Marcel Jirina and Said Krayem", title = "The Distance Function Optimization for the Near Neighbors-Based Classifiers", journal = j-TKDD, volume = "16", number = "6", pages = "101:1--101:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3434769", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3434769", abstract = "Based on the analysis of conditions for a good distance function we found four rules that should be fulfilled. Then, we introduce two new distance functions, a metric and a pseudometric one. We have tested how they fit for distance-based classifiers, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "101", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tey:2022:MLB, author = "Fu Jie Tey and Tin-Yu Wu and Jiann-Liang Chen", title = "Machine Learning-based Short-term Rainfall Prediction from Sky Data", journal = j-TKDD, volume = "16", number = "6", pages = "102:1--102:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502731", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502731", abstract = "To predict rainfall, our proposed model architecture combines the Convolutional Neural Network (CNN), which uses the ResNet-152 pre-training model, with the Recurrent Neural Network (RNN), which uses the Long Short-term Memory Network (LSTM) layer, for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "102", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mahmoud:2022:MOL, author = "Reem A. Mahmoud and Hazem Hajj", title = "Multi-objective Learning to Overcome Catastrophic Forgetting in Time-series Applications", journal = j-TKDD, volume = "16", number = "6", pages = "103:1--103:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502728", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3502728", abstract = "One key objective of artificial intelligence involves the continuous adaptation of machine learning models to new tasks. This branch of continual learning is also referred to as lifelong learning (LL), where a major challenge is to minimize catastrophic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "103", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:GES, author = "Zhaobo Wang and Yanmin Zhu and Qiaomei Zhang and Haobing Liu and Chunyang Wang and Tong Liu", title = "Graph-Enhanced Spatial-Temporal Network for Next {POI} Recommendation", journal = j-TKDD, volume = "16", number = "6", pages = "104:1--104:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3513092", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3513092", abstract = "The task of next Point-of-Interest (POI) recommendation aims at recommending a list of POIs for a user to visit at the next timestamp based on his/her previous interactions, which is valuable for both location-based service providers and users. Recent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "104", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tipirneni:2022:SST, author = "Sindhu Tipirneni and Chandan K. Reddy", title = "Self-Supervised Transformer for Sparse and Irregularly Sampled Multivariate Clinical Time-Series", journal = j-TKDD, volume = "16", number = "6", pages = "105:1--105:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3516367", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3516367", abstract = "Multivariate time-series data are frequently observed in critical care settings and are typically characterized by sparsity (missing information) and irregular time intervals. Existing approaches for learning representations in this domain handle these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "105", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gu:2022:IFS, author = "Shilin Gu and Yuhua Qian and Chenping Hou", title = "Incremental Feature Spaces Learning with Label Scarcity", journal = j-TKDD, volume = "16", number = "6", pages = "106:1--106:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3516368", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3516368", abstract = "Recently, learning and mining from data streams with incremental feature spaces have attracted extensive attention, where data may dynamically expand over time in both volume and feature dimensions. Existing approaches usually assume that the incoming \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "106", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2022:DME, author = "Zhe Li and Chunhua Sun and Chunli Liu and Xiayu Chen and Meng Wang and Yezheng Liu", title = "Dual-{MGAN}: an Efficient Approach for Semi-supervised Outlier Detection with Few Identified Anomalies", journal = j-TKDD, volume = "16", number = "6", pages = "107:1--107:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522690", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522690", abstract = "Outlier detection is an important task in data mining, and many technologies for it have been explored in various applications. However, owing to the default assumption that outliers are not concentrated, unsupervised outlier detection may not correctly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "107", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:NNE, author = "Yu Wang and Hanghang Tong and Ziye Zhu and Yun Li", title = "Nested Named Entity Recognition: a Survey", journal = j-TKDD, volume = "16", number = "6", pages = "108:1--108:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522593", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522593", abstract = "With the rapid development of text mining, many studies observe that text generally contains a variety of implicit information, and it is important to develop techniques for extracting such information. Named Entity Recognition (NER), the first step of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "108", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiao:2022:TQI, author = "Houping Xiao and Shiyu Wang", title = "Toward Quality of Information Aware Distributed Machine Learning", journal = j-TKDD, volume = "16", number = "6", pages = "109:1--109:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522591", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522591", abstract = "In the era of big data, data are usually distributed across numerous connected computing and storage units (i.e., nodes or workers). Under such an environment, many machine learning problems can be reformulated as a consensus optimization problem, which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "109", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cai:2022:ANI, author = "Jianghui Cai and Yuqing Yang and Haifeng Yang and Xujun Zhao and Jing Hao", title = "{ARIS}: a Noise Insensitive Data Pre-Processing Scheme for Data Reduction Using Influence Space", journal = j-TKDD, volume = "16", number = "6", pages = "110:1--110:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522592", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522592", abstract = "The extensive growth of data quantity has posed many challenges to data analysis and retrieval. Noise and redundancy are typical representatives of the above-mentioned challenges, which may reduce the reliability of analysis and retrieval results and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "110", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2022:IBL, author = "Xiaoying Zhang and Hong Xie and John C. S. Lui", title = "Improving Bandit Learning Via Heterogeneous Information Networks: Algorithms and Applications", journal = j-TKDD, volume = "16", number = "6", pages = "111:1--111:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522590", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522590", abstract = "Contextual bandit serves as an invaluable tool to balance the exploration vs. exploitation tradeoff in various applications such as online recommendation. In many applications, heterogeneous information networks (HINs) provide rich side information for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "111", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Desantis:2022:FBM, author = "Derek Desantis and Erik Skau and Duc P. Truong and Boian Alexandrov", title = "Factorization of Binary Matrices: Rank Relations, Uniqueness and Model Selection of {Boolean} Decomposition", journal = j-TKDD, volume = "16", number = "6", pages = "112:1--112:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522594", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522594", abstract = "The application of binary matrices are numerous. Representing a matrix as a mixture of a small collection of latent vectors via low-rank decomposition is often seen as an advantageous method to interpret and analyze data. In this work, we examine the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "112", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2022:SWT, author = "Xing Yang and Chen Zhang and Baihua Zheng", title = "Segment-Wise Time-Varying Dynamic {Bayesian} Network with Graph Regularization", journal = j-TKDD, volume = "16", number = "6", pages = "113:1--113:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522589", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522589", abstract = "Time-varying dynamic Bayesian network (TVDBN) is essential for describing time-evolving directed conditional dependence structures in complex multivariate systems. In this article, we construct a TVDBN model, together with a score-based method for its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "113", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2022:CSD, author = "Shaowei Wang and Lingling Zhang and Xuan Luo and Yi Yang and Xin Hu and Tao Qin and Jun Liu", title = "Computer Science Diagram Understanding with Topology Parsing", journal = j-TKDD, volume = "16", number = "6", pages = "114:1--114:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522689", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522689", abstract = "Diagram is a special form of visual expression for representing complex concepts, logic, and knowledge, which widely appears in educational scenes such as textbooks, blogs, and encyclopedias. Current research on diagrams preliminarily focuses on natural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "114", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jang:2022:FKS, author = "Jun-Gi Jang and Chaeheum Park and Changwon Jang and Geonsoo Kim and U. Kang", title = "Finding Key Structures in {MMORPG} Graph with Hierarchical Graph Summarization", journal = j-TKDD, volume = "16", number = "6", pages = "115:1--115:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522691", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3522691", abstract = "What are the key structures existing in a large real-world MMORPG (Massively Multiplayer Online Role-Playing Game) graph? How can we compactly summarize an MMORPG graph with hierarchical node labels, considering substructures at different levels of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "115", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2022:PPN, author = "Nengjun Zhu and Jian Cao and Xinjiang Lu and Chuanren Liu and Hao Liu and Yanyan Li and Xiangfeng Luo and Hui Xiong", title = "Predicting a Person's Next Activity Region with a Dynamic Region-Relation-Aware Graph Neural Network", journal = j-TKDD, volume = "16", number = "6", pages = "116:1--116:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3529091", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3529091", abstract = "The understanding of people's inter-regional mobility behaviors, such as predicting the next activity region (AR) or uncovering the intentions for regional mobility, is of great value to public administration or business interests. While there are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "116", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiong:2022:GDL, author = "Haoyi Xiong and Ruosi Wan and Jian Zhao and Zeyu Chen and Xingjian Li and Zhanxing Zhu and Jun Huan", title = "{GrOD}: Deep Learning with Gradients Orthogonal Decomposition for Knowledge Transfer, Distillation, and Adversarial Training", journal = j-TKDD, volume = "16", number = "6", pages = "117:1--117:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530836", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3530836", abstract = "Regularization that incorporates the linear combination of empirical loss and explicit regularization terms as the loss function has been frequently used for many machine learning tasks. The explicit regularization term is designed in different types, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "117", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Benarous:2022:SLH, author = "Maya Benarous and Eran Toch and Irad Ben-gal", title = "Synthesis of Longitudinal Human Location Sequences: Balancing Utility and Privacy", journal = j-TKDD, volume = "16", number = "6", pages = "118:1--118:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3529260", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3529260", abstract = "People's location data are continuously tracked from various devices and sensors, enabling an ongoing analysis of sensitive information that can violate people's privacy and reveal confidential information. Synthetic data have been used to generate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "118", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Coscia:2022:GEM, author = "Michele Coscia", title = "Generalized {Euclidean} Measure to Estimate Distances on Multilayer Networks", journal = j-TKDD, volume = "16", number = "6", pages = "119:1--119:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3529396", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3529396", abstract = "Estimating the distance covered by a spreading event on a network can lead to a better understanding of epidemics, economic growth, and human behavior. There are many methods solving this problem-which has been called Node Vector Distance (NVD)-for single \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "119", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yildiz:2022:SRR, author = "Ilkay Yildiz and Jennifer Dy and Deniz Erdogmus and Susan Ostmo and J. Peter Campbell and Michael F. Chiang and Stratis Ioannidis", title = "Spectral Ranking Regression", journal = j-TKDD, volume = "16", number = "6", pages = "120:1--120:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530693", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3530693", abstract = "We study the problem of ranking regression, in which a dataset of rankings is used to learn Plackett-Luce scores as functions of sample features. We propose a novel spectral algorithm to accelerate learning in ranking regression. Our main technical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "120", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2022:PDB, author = "Ziyue Li and Hao Yan and Fugee Tsung and Ke Zhang", title = "Profile Decomposition Based Hybrid Transfer Learning for Cold-Start Data Anomaly Detection", journal = j-TKDD, volume = "16", number = "6", pages = "121:1--121:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530990", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3530990", abstract = "Anomaly detection is an essential task for quality management in smart manufacturing. An accurate data-driven detection method usually needs enough data and labels. However, in practice, there commonly exist newly set-up processes in manufacturing, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "121", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2022:SDP, author = "Yue Hu and Ao Qu and Yanbing Wang and Daniel B. Work", title = "Streaming Data Preprocessing via Online Tensor Recovery for Large Environmental Sensor Networks", journal = j-TKDD, volume = "16", number = "6", pages = "122:1--122:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532189", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532189", abstract = "Measuring the built and natural environment at a fine-grained scale is now possible with low-cost urban environmental sensor networks. However, fine-grained city-scale data analysis is complicated by tedious data cleaning including removing outliers and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "122", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2022:HFD, author = "Pengfei Ma and Youxi Wu and Yan Li and Lei Guo and He Jiang and Xingquan Zhu and Xindong Wu", title = "{HW-Forest}: Deep Forest with Hashing Screening and Window Screening", journal = j-TKDD, volume = "16", number = "6", pages = "123:1--123:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532193", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532193", abstract = "As a novel deep learning model, gcForest has been widely used in various applications. However, current multi-grained scanning of gcForest produces many redundant feature vectors, and this increases the time cost of the model. To screen out redundant \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "123", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pellegrina:2022:MMC, author = "Leonardo Pellegrina and Cyrus Cousins and Fabio Vandin and Matteo Riondato", title = "{MCRapper}: {Monte-Carlo Rademacher} Averages for Poset Families and Approximate Pattern Mining", journal = j-TKDD, volume = "16", number = "6", pages = "124:1--124:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532187", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532187", abstract = "``I'm an MC still as honest'' --- Eminem, Rap God We present MCRapper, an algorithm for efficient computation of Monte-Carlo Empirical Rademacher Averages (MCERA) for families of functions exhibiting poset (e.g., lattice) structure, such as those that arise in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "124", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bechini:2022:NBF, author = "Alessio Bechini and Alessandro Bondielli and Jos{\'e} Luis Corcuera B{\'a}rcena and Pietro Ducange and Francesco Marcelloni and Alessandro Renda", title = "A News-Based Framework for Uncovering and Tracking City Area Profiles: Assessment in {Covid-19} Setting", journal = j-TKDD, volume = "16", number = "6", pages = "125:1--125:??", month = dec, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532186", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Nov 16 08:20:02 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532186", abstract = "In the last years, there has been an ever-increasing interest in profiling various aspects of city life, especially in the context of smart cities. This interest has become even more relevant recently when we have realized how dramatic events, such as the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "125", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:MCS, author = "Ting-Yun Wang and Chiao-Ting Chen and Ju-Chun Huang and Szu-Hao Huang", title = "Modeling Cross-session Information with Multi-interest Graph Neural Networks for the Next-item Recommendation", journal = j-TKDD, volume = "17", number = "1", pages = "1:1--1:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532192", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532192", abstract = "Next-item recommendation involves predicting the next item of interest of a given user from their past behavior. Users tend to browse and purchase various items on e-commerce websites according to their varied interests and needs, as reflected in their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:SRM, author = "Yu Wang and Chuan Chen and Jinrong Lai and Lele Fu and Yuren Zhou and Zibin Zheng", title = "A Self-Representation Method with Local Similarity Preserving for Fast Multi-View Outlier Detection", journal = j-TKDD, volume = "17", number = "1", pages = "2:1--2:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532191", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532191", abstract = "With the rapidly growing attention to multi-view data in recent years, multi-view outlier detection has become a rising field with intense research. These researches have made some success, but still exist some issues that need to be solved. First, many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:LAA, author = "Ke Wang and Yanmin Zhu and Haobing Liu and Tianzi Zang and Chunyang Wang", title = "Learning Aspect-Aware High-Order Representations from Ratings and Reviews for Recommendation", journal = j-TKDD, volume = "17", number = "1", pages = "3:1--3:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532188", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532188", abstract = "Textual reviews contain rich semantic information that is useful for making better recommendation, as such semantic information may indicate more fine-grained preferences of users. Recent efforts make considerable improvement on recommendation by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:IGL, author = "Zan Zhang and Lin Liu and Jiuyong Li and Xindong Wu", title = "Integrating Global and Local Feature Selection for Multi-Label Learning", journal = j-TKDD, volume = "17", number = "1", pages = "4:1--4:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532190", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532190", abstract = "Multi-label learning deals with the problem where an instance is associated with multiple labels simultaneously. Multi-label data is often of high dimensionality and has many noisy, irrelevant, and redundant features. As an important machine learning task, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:EAM, author = "Xinye Chen and Stefan G{\"u}ttel", title = "An Efficient Aggregation Method for the Symbolic Representation of Temporal Data", journal = j-TKDD, volume = "17", number = "1", pages = "5:1--5:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532622", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532622", abstract = "Symbolic representations are a useful tool for the dimension reduction of temporal data, allowing for the efficient storage of and information retrieval from time series. They can also enhance the training of machine learning algorithms on time series \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:SSG, author = "Lei Li and Mengjiao Yan and Zhenchao Tao and Huanhuan Chen and Xindong Wu", title = "Semi-Supervised Graph Pattern Matching and Rematching for Expert Community Location", journal = j-TKDD, volume = "17", number = "1", pages = "6:1--6:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532623", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532623", abstract = "Graph pattern matching (GPM) is widely used in social network analysis, such as expert finding, social group query, and social position detection. Technically, GPM is to find matched subgraphs that meet the requirements of pattern graphs in big social \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gu:2023:IMC, author = "Zhibin Gu and Songhe Feng", title = "Individuality Meets Commonality: a Unified Graph Learning Framework for Multi-View Clustering", journal = j-TKDD, volume = "17", number = "1", pages = "7:1--7:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532612", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532612", abstract = "Multi-view clustering, which aims at boosting the clustering performance by leveraging the individual information and the common information of multi-view data, has gained extensive consideration in recent years. However, most existing multi-view \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chakraborty:2023:SAM, author = "Roshni Chakraborty and Ritwika Das and Joydeep Chandra", title = "{SigGAN}: Adversarial Model for Learning Signed Relationships in Networks", journal = j-TKDD, volume = "17", number = "1", pages = "8:1--8:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532610", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532610", abstract = "Signed link prediction in graphs is an important problem that has applications in diverse domains. It is a binary classification problem that predicts whether an edge between a pair of nodes is positive or negative. Existing approaches for link prediction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:DGC, author = "Fuxian Li and Jie Feng and Huan Yan and Guangyin Jin and Fan Yang and Funing Sun and Depeng Jin and Yong Li", title = "Dynamic Graph Convolutional Recurrent Network for Traffic Prediction: Benchmark and Solution", journal = j-TKDD, volume = "17", number = "1", pages = "9:1--9:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532611", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532611", abstract = "Traffic prediction is the cornerstone of intelligent transportation system. Accurate traffic forecasting is essential for the applications of smart cities, i.e., intelligent traffic management and urban planning. Although various methods are proposed for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2023:URD, author = "Gengsen Huang and Wensheng Gan and Jian Weng and Philip S. Yu", title = "{US-Rule}: Discovering Utility-driven Sequential Rules", journal = j-TKDD, volume = "17", number = "1", pages = "10:1--10:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532613", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532613", abstract = "Utility-driven mining is an important task in data science and has many applications in real life. High-utility sequential pattern mining (HUSPM) is one kind of utility-driven mining. It aims at discovering all sequential patterns with high utility. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:MCR, author = "Jiapu Wang and Boyue Wang and Junbin Gao and Yongli Hu and Baocai Yin", title = "Multi-Concept Representation Learning for Knowledge Graph Completion", journal = j-TKDD, volume = "17", number = "1", pages = "11:1--11:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533017", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3533017", abstract = "Knowledge Graph Completion (KGC) aims at inferring missing entities or relations by embedding them in a low-dimensional space. However, most existing KGC methods generally fail to handle the complex concepts hidden in triplets, so the learned embeddings \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kirielle:2023:UGB, author = "Nishadi Kirielle and Peter Christen and Thilina Ranbaduge", title = "Unsupervised Graph-Based Entity Resolution for Complex Entities", journal = j-TKDD, volume = "17", number = "1", pages = "12:1--12:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533016", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3533016", abstract = "Entity resolution (ER) is the process of linking records that refer to the same entity. Traditionally, this process compares attribute values of records to calculate similarities and then classifies pairs of records as referring to the same entity or not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kwon:2023:EBM, author = "Soonki Kwon and Younghoon Lee", title = "Explainability-Based Mix-Up Approach for Text Data Augmentation", journal = j-TKDD, volume = "17", number = "1", pages = "13:1--13:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533048", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3533048", abstract = "Text augmentation is a strategy for increasing the diversity of training examples without explicitly collecting new data. Owing to the efficiency and effectiveness of text augmentation, numerous augmentation methodologies have been proposed. Among them, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:CBS, author = "Qian Li and Xiangmeng Wang and Zhichao Wang and Guandong Xu", title = "Be Causal: De-Biasing Social Network Confounding in Recommendation", journal = j-TKDD, volume = "17", number = "1", pages = "14:1--14:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533725", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3533725", abstract = "In recommendation systems, the existence of the missing-not-at-random (MNAR) problem results in the selection bias issue, degrading the recommendation performance ultimately. A common practice to address MNAR is to treat missing entries from the so-called \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Luo:2023:SDH, author = "Xiao Luo and Haixin Wang and Daqing Wu and Chong Chen and Minghua Deng and Jianqiang Huang and Xian-Sheng Hua", title = "A Survey on Deep Hashing Methods", journal = j-TKDD, volume = "17", number = "1", pages = "15:1--15:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532624", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 16 07:36:49 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3532624", abstract = "Nearest neighbor search aims at obtaining the samples in the database with the smallest distances from them to the queries, which is a basic task in a range of fields, including computer vision and data mining. Hashing is one of the most widely used methods for its computational and storage efficiency. With the development of deep learning, deep hashing methods show more advantages than traditional methods. In this survey, we detailedly investigate current deep hashing algorithms including deep supervised hashing and deep unsupervised hashing. Specifically, we categorize deep supervised hashing methods into pairwise methods, ranking-based methods, pointwise methods as well as quantization according to how measuring the similarities of the learned hash codes. Moreover, deep unsupervised hashing is categorized into similarity reconstruction-based methods, pseudo-label-based methods, and prediction-free self-supervised learning-based methods based on their semantic learning manners. We also introduce three related important topics including semi-supervised deep hashing, domain adaption deep hashing, and multi-modal deep hashing. Meanwhile, we present some commonly used public datasets and the scheme to measure the performance of deep hashing algorithms. Finally, we discuss some potential research directions in conclusion.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nguyen:2023:DAG, author = "Hung T. Nguyen and Pierre J. Liang and Leman Akoglu", title = "Detecting Anomalous Graphs in Labeled Multi-Graph Databases", journal = j-TKDD, volume = "17", number = "2", pages = "16:1--16:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533770", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3533770", abstract = "Within a large database G containing graphs with labeled nodes and directed, multi-edges; how can we detect the anomalous graphs? Most existing work are designed for plain (unlabeled) and/or simple (unweighted) graphs. We introduce CODEtect, the first. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2023:SDI, author = "Xun Lu and Songhe Feng", title = "Structure Diversity-Induced Anchor Graph Fusion for Multi-View Clustering", journal = j-TKDD, volume = "17", number = "2", pages = "17:1--17:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3534931", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3534931", abstract = "The anchor graph structure has been widely used to speed up large-scale multi-view clustering and exhibited promising performance. How to effectively integrate the anchor graphs on multiple views to achieve enhanced clustering performance still remains a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:GML, author = "Lichen Wang and Zhengming Ding and Kasey Lee and Seungju Han and Jae-Joon Han and Changkyu Choi and Yun Fu", title = "Generative Multi-Label Correlation Learning", journal = j-TKDD, volume = "17", number = "2", pages = "18:1--18:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3538708", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3538708", abstract = "In real-world applications, a single instance could have more than one label. To solve this task, multi-label learning methods emerged in recent years. It is a more challenging problem for many reasons, such as complex label correlation, long-tail label \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2023:DPE, author = "Xun Lu and Songhe Feng and Gengyu Lyu and Yi Jin and Congyan Lang", title = "Distance-Preserving Embedding Adaptive Bipartite Graph Multi-View Learning with Application to Multi-Label Classification", journal = j-TKDD, volume = "17", number = "2", pages = "19:1--19:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3537900", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3537900", abstract = "Graph-based multi-view learning has attracted much attention due to the efficacy of fusing the information from different views. However, most of them exhibit high computational complexity. We propose an anchor-based bipartite graph embedding approach to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:CUV, author = "Qianru Wang and Bin Guo and Lu Cheng and Zhiwen Yu and Huan Liu", title = "{CausalSE}: Understanding Varied Spatial Effects with Missing Data Toward Adding New Bike-sharing Stations", journal = j-TKDD, volume = "17", number = "2", pages = "20:1--20:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3536427", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3536427", abstract = "To meet the growing bike-sharing demands and make people's travel convenient, the companies need to add new stations at locations where demands exceed supply. Before making reliable decisions on adding new stations, it is required to understand the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jerez:2023:EAD, author = "Carlos Ivan Jerez and Jun Zhang and Marcia R. Silva", title = "On Equivalence of Anomaly Detection Algorithms", journal = j-TKDD, volume = "17", number = "2", pages = "21:1--21:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3536428", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3536428", abstract = "In most domains, anomaly detection is typically cast as an unsupervised learning problem because of the infeasibility of labeling large datasets. In this setup, the evaluation and comparison of different anomaly detection algorithms is difficult. Although \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Freris:2023:IEV, author = "Nikolaos M. Freris and Ahmad Ajalloeian and Michalis Vlachos", title = "Interpretable Embedding and Visualization of Compressed Data", journal = j-TKDD, volume = "17", number = "2", pages = "22:1--22:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3537901", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3537901", abstract = "Traditional embedding methodologies, also known as dimensionality reduction techniques, assume the availability of exact pairwise distances between the high-dimensional objects that will be embedded in a lower dimensionality. In this article, we propose \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:MIF, author = "Shaokang Wang and Li Pan and Yu Wu", title = "Meta-Information Fusion of Hierarchical Semantics Dependency and Graph Structure for Structured Text Classification", journal = j-TKDD, volume = "17", number = "2", pages = "23:1--23:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3537971", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3537971", abstract = "Structured text with plentiful hierarchical structure information is an important part in real-world complex texts. Structured text classification is attracting more attention in natural language processing due to the increasing complexity of application \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2023:NGI, author = "Xuliang Zhu and Xin Huang and Longxu Sun and Jiming Liu", title = "A Novel Graph Indexing Approach for Uncovering Potential {COVID-19} Transmission Clusters", journal = j-TKDD, volume = "17", number = "2", pages = "24:1--24:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3538492", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3538492", abstract = "The COVID-19 pandemic has caused the society lockdowns and a large number of deaths in many countries. Potential transmission cluster discovery is to find all suspected users with infections, which is greatly needed to fast discover virus transmission \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Atyabi:2023:SCA, author = "Adham Atyabi and Frederick Shic and Jiajun Jiang and Claire E. Foster and Erin Barney and Minah Kim and Beibin Li and Pamela Ventola and Chung Hao Chen", title = "Stratification of Children with Autism Spectrum Disorder Through Fusion of Temporal Information in Eye-gaze Scan-Paths", journal = j-TKDD, volume = "17", number = "2", pages = "25:1--25:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3539226", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3539226", abstract = "Background: Looking pattern differences are shown to separate individuals with Autism Spectrum Disorder (ASD) and Typically Developing (TD) controls. Recent studies have shown that, in children with ASD, these patterns change with intellectual and social \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:GDF, author = "Hongjie Chen and Ryan A. Rossi and Kanak Mahadik and Sungchul Kim and Hoda Eldardiry", title = "Graph Deep Factors for Probabilistic Time-series Forecasting", journal = j-TKDD, volume = "17", number = "2", pages = "26:1--26:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3543511", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3543511", abstract = "Effective time-series forecasting methods are of significant importance to solve a broad spectrum of research problems. Deep probabilistic forecasting techniques have recently been proposed for modeling large collections of time-series. However, these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jha:2023:SCL, author = "Akshita Jha and Vineeth Rakesh and Jaideep Chandrashekar and Adithya Samavedhi and Chandan K. Reddy", title = "Supervised Contrastive Learning for Interpretable Long-Form Document Matching", journal = j-TKDD, volume = "17", number = "2", pages = "27:1--27:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3542822", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3542822", abstract = "Recent advancements in deep learning techniques have transformed the area of semantic text matching (STM). However, most state-of-the-art models are designed to operate with short documents such as tweets, user reviews, comments, and so on. These models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Vajiac:2023:DIT, author = "Catalina Vajiac and Meng-Chieh Lee and Aayushi Kulshrestha and Sacha Levy and Namyong Park and Andreas Olligschlaeger and Cara Jones and Reihaneh Rabbany and Christos Faloutsos", title = "{DeltaShield}: Information Theory for Human- Trafficking Detection", journal = j-TKDD, volume = "17", number = "2", pages = "28:1--28:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563040", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3563040", abstract = "Given a million escort advertisements, how can we spot near-duplicates? Such micro-clusters of ads are usually signals of human trafficking (HT). How can we summarize them to convince law enforcement to act? Spotting micro-clusters of near-duplicate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2023:SHI, author = "Jianhui Sun and Ying Yang and Guangxu Xun and Aidong Zhang", title = "Scheduling Hyperparameters to Improve Generalization: From Centralized {SGD} to Asynchronous {SGD}", journal = j-TKDD, volume = "17", number = "2", pages = "29:1--29:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544782", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3544782", abstract = "This article $^1$ studies how to schedule hyperparameters to improve generalization of both centralized single-machine stochastic gradient descent (SGD) and distributed asynchronous SGD (ASGD). SGD augmented with momentum variants (e.g., heavy ball momentum (\ldots{}))", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Miao:2023:DPC, author = "Xiaoye Miao and Huanhuan Peng and Yunjun Gao and Zongfu Zhang and Jianwei Yin", title = "On Dynamically Pricing Crowdsourcing Tasks", journal = j-TKDD, volume = "17", number = "2", pages = "30:1--30:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544018", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3544018", abstract = "Crowdsourcing techniques have been extensively explored in the past decade, including task allocation, quality assessment, and so on. Most of professional crowdsourcing platforms adopt the fixed pricing scheme to offer a fixed price for crowd tasks. It is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kamhoua:2023:GGG, author = "Barakeel Fanseu Kamhoua and Lin Zhang and Kaili Ma and James Cheng and Bo Li and Bo Han", title = "{GRACE}: a General Graph Convolution Framework for Attributed Graph Clustering", journal = j-TKDD, volume = "17", number = "3", pages = "31:1--31:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544977", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3544977", abstract = "Attributed graph clustering (AGC) is an important problem in graph mining as more and more complex data in real-world have been represented in graphs with attributed nodes. While it is a common practice to leverage both attribute and structure information \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhai:2023:LGR, author = "Penglong Zhai and Shihua Zhang", title = "Learnable Graph-Regularization for Matrix Decomposition", journal = j-TKDD, volume = "17", number = "3", pages = "32:1--32:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544781", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3544781", abstract = "Low-rank approximation models of data matrices have become important machine learning and data mining tools in many fields, including computer vision, text mining, bioinformatics, and many others. They allow for embedding high-dimensional data into low-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:RLP, author = "Jinwei Chen and Zefang Zong and Yunlin Zhuang and Huan Yan and Depeng Jin and Yong Li", title = "Reinforcement Learning for Practical Express Systems with Mixed Deliveries and Pickups", journal = j-TKDD, volume = "17", number = "3", pages = "33:1--33:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546952", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3546952", abstract = "In real-world express systems, couriers need to satisfy not only the delivery demands but also the pick-up demands of customers. Delivery and pickup tasks are usually mixed together within integrated routing plans. Such a mixed routing problem can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2023:CTE, author = "Tao Feng and Sirui Song and Tong Xia and Yong Li", title = "Contact Tracing and Epidemic Intervention via Deep Reinforcement Learning", journal = j-TKDD, volume = "17", number = "3", pages = "34:1--34:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546870", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3546870", abstract = "The recent outbreak of COVID-19 poses a serious threat to people's lives. Epidemic control strategies have also caused damage to the economy by cutting off humans' daily commute. In this article, we develop an Individual-based Reinforcement Learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wan:2023:PMT, author = "Mingyang Wan and Daochen Zha and Ninghao Liu and Na Zou", title = "In-Processing Modeling Techniques for Machine Learning Fairness: a Survey", journal = j-TKDD, volume = "17", number = "3", pages = "35:1--35:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551390", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3551390", abstract = "Machine learning models are becoming pervasive in high-stakes applications. Despite their clear benefits in terms of performance, the models could show discrimination against minority groups and result in fairness issues in a decision-making process, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Long:2023:MAC, author = "Qiang Long and Adil Bagirov and Sona Taheri and Nargiz Sultanova and Xue Wu", title = "Methods and Applications of Clusterwise Linear Regression: a Survey and Comparison", journal = j-TKDD, volume = "17", number = "3", pages = "36:1--36:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3550074", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3550074", abstract = "Clusterwise linear regression (CLR) is a well-known technique for approximating a data using more than one linear function. It is based on the combination of clustering and multiple linear regression methods. This article provides a comprehensive survey \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2023:OMO, author = "Youxi Wu and Mingjie Chen and Yan Li and Jing Liu and Zhao Li and Jinyan Li and Xindong Wu", title = "{ONP-Miner}: One-off Negative Sequential Pattern Mining", journal = j-TKDD, volume = "17", number = "3", pages = "37:1--37:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3549940", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3549940", abstract = "Negative sequential pattern mining (SPM) is an important SPM research topic. Unlike positive SPM, negative SPM can discover events that should have occurred but have not occurred, and it can be used for financial risk management and fraud detection. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Carchiolo:2023:ENP, author = "Vincenza Carchiolo and Marco Grassia and Alessandro Longheu and Michele Malgeri and Giuseppe Mangioni", title = "Efficient Node {PageRank} Improvement via Link-building using Geometric Deep Learning", journal = j-TKDD, volume = "17", number = "3", pages = "38:1--38:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551642", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3551642", abstract = "Centrality is a relevant topic in the field of network research, due to its various theoretical and practical implications. In general, all centrality metrics aim at measuring the importance of nodes (according to some definition of importance), and such importance scores are used to rank the nodes in the network, therefore the rank improvement is a strictly related topic. In a given network, the rank improvement is achieved by establishing new links, therefore the question shifts to which and how many links should be collected to get a desired rank. This problem, also known as link-building has been shown to be NP-hard, and most heuristics developed failed in obtaining good performance with acceptable computational complexity. In this article, we present LB--GDM, a novel approach that leverages Geometric Deep Learning to tackle the link-building problem. To validate our proposal, 31 real-world networks were considered; tests show that LB--GDM performs significantly better than the state-of-the-art heuristics, while having a comparable or even lower computational complexity, which allows it to scale well even to large networks.\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2023:LLM, author = "Linli Jiang and Chao-Xiong Chen and Chao Chen", title = "{L2MM}: Learning to Map Matching with Deep Models for Low-Quality {GPS} Trajectory Data", journal = j-TKDD, volume = "17", number = "3", pages = "39:1--39:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3550486", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3550486", abstract = "Map matching is a fundamental research topic with the objective of aligning GPS trajectories to paths on the road network. However, existing models fail to achieve satisfactory performance for low-quality (i.e., noisy, low-frequency, and non-uniform) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:EIS, author = "Yihong Zhang and Takahiro Hara", title = "Explainable Integration of Social Media Background in a Dynamic Neural Recommender", journal = j-TKDD, volume = "17", number = "3", pages = "40:1--40:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3550279", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3550279", abstract = "Recommender systems nowadays are commonly deployed in e-commerce platforms to help customers making purchase decisions. Dynamic recommender considers not only static user-item interaction data, but the temporal information at the time of recommendation. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:MRB, author = "Yashen Wang and Zhaoyu Wang and Huanhuan Zhang and Zhirun Liu", title = "Microblog Retrieval Based on Concept-Enhanced Pre-Training Model", journal = j-TKDD, volume = "17", number = "3", pages = "41:1--41:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3552311", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3552311", abstract = "Despite substantial interest in applications of neural networks to information retrieval, neural ranking models have mostly been applied to conventional ad-hoc retrieval tasks over web pages and newswire articles. This article proposes a concept-enhanced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wei:2023:DSB, author = "Xuemei Wei and Yezheng Liu and Jianshan Sun and Yuanchun Jiang and Qifeng Tang and Kun Yuan", title = "Dual Subgraph-Based Graph Neural Network for Friendship Prediction in Location-Based Social Networks", journal = j-TKDD, volume = "17", number = "3", pages = "42:1--42:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3554981", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3554981", abstract = "With the wide use of Location-Based Social Networks (LBSNs), predicting user friendship from online social relations and offline trajectory data is of great value to improve the platform service quality and user satisfaction. Existing methods mainly focus \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2023:DTL, author = "Xin Jiang and Zhengxin Yu and Chao Hai and Hongbo Liu and Xindong Wu and Tomas Ward", title = "{DNformer}: Temporal Link Prediction with Transfer Learning in Dynamic Networks", journal = j-TKDD, volume = "17", number = "3", pages = "43:1--43:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551892", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3551892", abstract = "Temporal link prediction (TLP) is among the most important graph learning tasks, capable of predicting dynamic, time-varying links within networks. The key problem of TLP is how to explore potential link-evolving tendency from the increasing number of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Aleryani:2023:MIE, author = "Aliya Aleryani and Aaron Bostrom and Wenjia Wang and Beatriz Iglesia", title = "Multiple Imputation Ensembles for Time Series ({MIE-TS})", journal = j-TKDD, volume = "17", number = "3", pages = "44:1--44:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551643", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3551643", abstract = "Time series classification has become an interesting field of research, thanks to the extensive studies conducted in the past two decades. Time series may have missing data, which may affect both the representation and also modeling of time series. Thus, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:VGA, author = "Dongjie Li and Dong Li and Guang Lian", title = "Variational Graph Autoencoder with Adversarial Mutual Information Learning for Network Representation Learning", journal = j-TKDD, volume = "17", number = "3", pages = "45:1--45:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555809", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri Mar 31 09:53:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3555809", abstract = "With the success of Graph Neural Network (GNN) in network data, some GNN-based representation learning methods for networks have emerged recently. Variational Graph Autoencoder (VGAE) is a basic GNN framework for network representation. Its purpose is to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2023:CTIa, author = "Gongqing Wu and Liangzhu Zhou and Jiazhu Xia and Lei Li and Xianyu Bao and Xindong Wu", title = "Crowdsourcing Truth Inference Based on Label Confidence Clustering", journal = j-TKDD, volume = "17", number = "4", pages = "46:1--46:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3556545", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3556545", abstract = "Truth inference can help solve some difficult problems of data integration in crowdsourcing. Crowdsourced workers are not experts and their labeling ability varies greatly; therefore, in practical applications, it is difficult to determine whether the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sehnan:2023:DSI, author = "Dhruv Sehnan and Vasu Goel and Sarah Masud and Chhavi Jain and Vikram Goyal and Tanmoy Chakraborty", title = "{DiVA}: a Scalable, Interactive and Customizable Visual Analytics Platform for Information Diffusion on Large Networks", journal = j-TKDD, volume = "17", number = "4", pages = "47:1--47:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558771", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3558771", abstract = "With an increasing outreach of digital platforms in our lives, researchers have taken a keen interest in studying different facets of social interactions. Analyzing the spread of information ( aka diffusion) has brought forth multiple research areas such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{E:2023:CEC, author = "Jinlong E. and Mo Li and Jianqiang Huang", title = "{CrowdAtlas}: Estimating Crowd Distribution within the Urban Rail Transit System", journal = j-TKDD, volume = "17", number = "4", pages = "48:1--48:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558521", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3558521", abstract = "While urban rail transit systems are playing an increasingly important role in meeting the transportation demands of people, precise awareness of how the human crowd is distributed within such a system is highly necessary, which serves a range of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2023:PFL, author = "Lei Yang and Jiaming Huang and Wanyu Lin and Jiannong Cao", title = "Personalized Federated Learning on {Non-IID} Data via Group-based Meta-learning", journal = j-TKDD, volume = "17", number = "4", pages = "49:1--49:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558005", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3558005", abstract = "Personalized federated learning (PFL) has emerged as a paradigm to provide a personalized model that can fit the local data distribution of each client. One natural choice for PFL is to leverage the fast adaptation capability of meta-learning, where it \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hermanns:2023:GSG, author = "Judith Hermanns and Konstantinos Skitsas and Anton Tsitsulin and Marina Munkhoeva and Alexander Kyster and Simon Nielsen and Alexander M. Bronstein and Davide Mottin and Panagiotis Karras", title = "{GRASP}: Scalable Graph Alignment by Spectral Corresponding Functions", journal = j-TKDD, volume = "17", number = "4", pages = "50:1--50:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561058", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3561058", abstract = "What is the best way to match the nodes of two graphs? This graph alignment problem generalizes graph isomorphism and arises in applications from social network analysis to bioinformatics. Some solutions assume that auxiliary information on known matches \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nakajima:2023:RWS, author = "Kazuki Nakajima and Kazuyuki Shudo", title = "Random Walk Sampling in Social Networks Involving Private Nodes", journal = j-TKDD, volume = "17", number = "4", pages = "51:1--51:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561388", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3561388", abstract = "Analysis of social networks with limited data access is challenging for third parties. To address this challenge, a number of studies have developed algorithms that estimate properties of social networks via a simple random walk. However, most existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shui:2023:LOL, author = "Changjian Shui and William Wang and Ihsen Hedhli and Chi Man Wong and Feng Wan and Boyu Wang and Christian Gagn{\'e}", title = "Lifelong Online Learning from Accumulated Knowledge", journal = j-TKDD, volume = "17", number = "4", pages = "52:1--52:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563947", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3563947", abstract = "In this article, we formulate lifelong learning as an online transfer learning procedure over consecutive tasks, where learning a given task depends on the accumulated knowledge. We propose a novel theoretical principled framework, lifelong online. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dai:2023:DMV, author = "Shaojie Dai and Jinshuai Wang and Chao Huang and Yanwei Yu and Junyu Dong", title = "Dynamic Multi-View Graph Neural Networks for Citywide Traffic Inference", journal = j-TKDD, volume = "17", number = "4", pages = "53:1--53:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564754", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3564754", abstract = "Accurate citywide traffic inference is critical for improving intelligent transportation systems with smart city applications. However, this task is very challenging given the limited training data, due to the high cost of sensor installment and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ling:2023:STD, author = "Shuai Ling and Zhe Yu and Shaosheng Cao and Haipeng Zhang and Simon Hu", title = "{STHAN}: Transportation Demand Forecasting with Compound Spatio-Temporal Relationships", journal = j-TKDD, volume = "17", number = "4", pages = "54:1--54:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565578", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3565578", abstract = "Transportation demand forecasting is a critical precondition of optimal online transportation dispatch, which will greatly reduce drivers' wasted mileage and customers' waiting time, contributing to economic and environmental sustainability. Though \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2023:MMI, author = "Jiaying Liu and Feng Xia and Jing Ren and Bo Xu and Guansong Pang and Lianhua Chi", title = "{MIRROR}: Mining Implicit Relationships via Structure-Enhanced Graph Convolutional Networks", journal = j-TKDD, volume = "17", number = "4", pages = "55:1--55:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564531", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3564531", abstract = "Data explosion in the information society drives people to develop more effective ways to extract meaningful information. Extracting semantic information and relational information has emerged as a key mining primitive in a wide variety of practical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2023:TTA, author = "Zhi Liu and Yang Chen and Feng Xia and Jixin Bian and Bing Zhu and Guojiang Shen and Xiangjie Kong", title = "{TAP}: Traffic Accident Profiling via Multi-Task Spatio-Temporal Graph Representation Learning", journal = j-TKDD, volume = "17", number = "4", pages = "56:1--56:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564594", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3564594", abstract = "Predicting traffic accidents can help traffic management departments respond to sudden traffic situations promptly, improve drivers' vigilance, and reduce losses caused by traffic accidents. However, the causality of traffic accidents is complex and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:TRR, author = "Lei Chen and Jie Cao and Haicheng Tao and Jia Wu", title = "Trip Reinforcement Recommendation with Graph-based Representation Learning", journal = j-TKDD, volume = "17", number = "4", pages = "57:1--57:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564609", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3564609", abstract = "Tourism is an important industry and a popular leisure activity involving billions of tourists per annum. One challenging problem tourists face is identifying attractive Places-of-Interest (POIs) and planning the personalized trip with time constraints. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:LEL, author = "Huiming Chen and Huandong Wang and Quanming Yao and Yong Li and Depeng Jin and Qiang Yang", title = "{LoSAC}: an Efficient Local Stochastic Average Control Method for Federated Optimization", journal = j-TKDD, volume = "17", number = "4", pages = "58:1--58:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3566128", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3566128", abstract = "Federated optimization (FedOpt), which targets at collaboratively training a learning model across a large number of distributed clients, is vital for federated learning. The primary concerns in FedOpt can be attributed to the model divergence and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jing:2023:LSR, author = "Mengyuan Jing and Yanmin Zhu and Yanan Xu and Haobing Liu and Tianzi Zang and Chunyang Wang and Jiadi Yu", title = "Learning Shared Representations for Recommendation with Dynamic Heterogeneous Graph Convolutional Networks", journal = j-TKDD, volume = "17", number = "4", pages = "59:1--59:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565575", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3565575", abstract = "Graph Convolutional Networks (GCNs) have been widely used for collaborative filtering, due to their effectiveness in exploiting high-order collaborative signals. However, two issues have not been well addressed by existing studies. First, usually only one \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:SSD, author = "Yizong Wang and Dong Zhao and Yajie Ren and Desheng Zhang and Huadong Ma", title = "{SPAP}: Simultaneous Demand Prediction and Planning for Electric Vehicle Chargers in a New City", journal = j-TKDD, volume = "17", number = "4", pages = "60:1--60:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565577", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:29:25 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3565577", abstract = "For a new city that is committed to promoting Electric Vehicles (EVs), it is significant to plan the public charging infrastructure where charging demands are high. However, it is difficult to predict charging demands before the actual deployment of EV \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2023:ESM, author = "Dandan Lin and Victor Junqiu Wei and Raymond Chi-Wing Wong", title = "Effective and Scalable Manifold Ranking-Based Image Retrieval with Output Bound", journal = j-TKDD, volume = "17", number = "5", pages = "61:1--61:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565574", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3565574", abstract = "Image retrieval keeps attracting a lot of attention from both academic and industry over past years due to its variety of useful applications. Due to the rapid growth of deep learning approaches, more better feature vectors of images could be discovered \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2023:SPA, author = "Peng Zhou and Xinwang Liu and Liang Du and Xuejun Li", title = "Self-paced Adaptive Bipartite Graph Learning for Consensus Clustering", journal = j-TKDD, volume = "17", number = "5", pages = "62:1--62:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564701", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3564701", abstract = "Consensus clustering provides an elegant framework to aggregate multiple weak clustering results to learn a consensus one that is more robust and stable than a single result. However, most of the existing methods usually use all data for consensus \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:HTN, author = "Mengran Li and Yong Zhang and Xiaoyong Li and Yuchen Zhang and Baocai Yin", title = "Hypergraph Transformer Neural Networks", journal = j-TKDD, volume = "17", number = "5", pages = "63:1--63:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565028", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3565028", abstract = "Graph neural networks (GNNs) have been widely used for graph structure learning and achieved excellent performance in tasks such as node classification and link prediction. Real-world graph networks imply complex and various semantic information and are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:TFF, author = "Haoran Li and Zhiqiang Lv and Jianbo Li and Zhihao Xu and Yue Wang and Haokai Sun and Zhaoyu Sheng", title = "Traffic Flow Forecasting in the {COVID-19}: a Deep Spatial-temporal Model Based on Discrete Wavelet Transformation", journal = j-TKDD, volume = "17", number = "5", pages = "64:1--64:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564753", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3564753", abstract = "Traffic flow prediction has always been the focus of research in the field of Intelligent Transportation Systems, which is conducive to the more reasonable allocation of basic transportation resources and formulation of transportation policies. The spread \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2023:CTIb, author = "Gongqing Wu and Xingrui Zhuo and Xianyu Bao and Xuegang Hu and Richang Hong and Xindong Wu", title = "Crowdsourcing Truth Inference via Reliability-Driven Multi-View Graph Embedding", journal = j-TKDD, volume = "17", number = "5", pages = "65:1--65:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565576", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3565576", abstract = "Crowdsourcing truth inference aims to assign a correct answer to each task from candidate answers that are provided by crowdsourced workers. A common approach is to generate workers' reliabilities to represent the quality of answers. Although crowdsourced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jang:2023:SST, author = "Jun-Gi Jang and U. Kang", title = "Static and Streaming {Tucker} Decomposition for Dense Tensors", journal = j-TKDD, volume = "17", number = "5", pages = "66:1--66:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568682", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3568682", abstract = "Given a dense tensor, how can we efficiently discover hidden relations and patterns in static and online streaming settings? Tucker decomposition is a fundamental tool to analyze multidimensional arrays in the form of tensors. However, existing Tucker \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:ULH, author = "Meng Wang and Boyu Li and Kun He and John Hopcroft", title = "Uncovering the Local Hidden Community Structure in Social Networks", journal = j-TKDD, volume = "17", number = "5", pages = "67:1--67:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3567597", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3567597", abstract = "Hidden community is a useful concept proposed recently for social network analysis. Hidden communities indicate some weak communities whose most members also belong to other stronger dominant communities. Dominant communities could form a layer that \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2023:CFU, author = "Hao Liu and Qingyu Guo and Hengshu Zhu and Yanjie Fu and Fuzhen Zhuang and Xiaojuan Ma and Hui Xiong", title = "Characterizing and Forecasting Urban Vibrancy Evolution: a Multi-View Graph Mining Perspective", journal = j-TKDD, volume = "17", number = "5", pages = "68:1--68:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568683", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3568683", abstract = "Urban vibrancy describes the prosperity, diversity, and accessibility of urban areas, which is vital to a city's socio-economic development and sustainability. While many efforts have been made for statically measuring and evaluating urban vibrancy, there \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ren:2023:AMA, author = "Yuyang Ren and Haonan Zhang and Peng Yu and Luoyi Fu and Xinde Cao and Xinbing Wang and Guihai Chen and Fei Long and Chenghu Zhou", title = "{Ada-MIP}: Adaptive Self-supervised Graph Representation Learning via Mutual Information and Proximity Optimization", journal = j-TKDD, volume = "17", number = "5", pages = "69:1--69:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568165", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3568165", abstract = "Self-supervised graph-level representation learning has recently received considerable attention. Given varied input distributions, jointly learning graphs' unique and common features is vital to downstream tasks. Inspired by graph contrastive learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gu:2023:OJU, author = "Zhibin Gu and Songhe Feng and Ruiting Hu and Gengyu Lyu", title = "{ONION}: Joint Unsupervised Feature Selection and Robust Subspace Extraction for Graph-based Multi-View Clustering", journal = j-TKDD, volume = "17", number = "5", pages = "70:1--70:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568684", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3568684", abstract = "Graph-based Multi-View Clustering (GMVC) has received extensive attention due to its ability to capture the neighborhood relationship among data points from diverse views. However, most existing approaches construct similarity graphs from the original \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:SGU, author = "Zhijie Zhang and Wenzhong Li and Wangxiang Ding and Linming Zhang and Qingning Lu and Peng Hu and Tong Gui and Sanglu Lu", title = "{STAD-GAN}: Unsupervised Anomaly Detection on Multivariate Time Series with Self-training Generative Adversarial Networks", journal = j-TKDD, volume = "17", number = "5", pages = "71:1--71:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572780", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3572780", abstract = "Anomaly detection on multivariate time series (MTS) is an important research topic in data mining, which has a wide range of applications in information technology, financial management, manufacturing system, and so on. However, the state-of-the-art \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2023:WEC, author = "Hongxin Wu and Meng Han and Zhiqiang Chen and Muhang Li and Xilong Zhang", title = "A Weighted Ensemble Classification Algorithm Based on Nearest Neighbors for Multi-Label Data Stream", journal = j-TKDD, volume = "17", number = "5", pages = "72:1--72:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570960", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3570960", abstract = "With the rapid development of data stream, multi-label algorithms for mining dynamic data become more and more important. At the same time, when data distribution changes, concept drift will occur, which will make the existing classification models lose \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:ASA, author = "Chunnan Wang and Kaixin Zhang and Hongzhi Wang and Bozhou Chen", title = "{Auto-STGCN}: Autonomous Spatial-Temporal Graph Convolutional Network Search", journal = j-TKDD, volume = "17", number = "5", pages = "73:1--73:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571285", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3571285", abstract = "In recent years, many spatial-temporal graph convolutional network (STGCN) models are proposed to deal with the spatial-temporal network data forecasting problem. These STGCN models have their own advantages, i.e., each of them puts forward many effective \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:SSS, author = "Yufu Chen and Yanghui Rao and Shurui Chen and Zhiqi Lei and Haoran Xie and Raymond Y. K. Lau and Jian Yin", title = "Semi-Supervised Sentiment Classification and Emotion Distribution Learning Across Domains", journal = j-TKDD, volume = "17", number = "5", pages = "74:1--74:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571736", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3571736", abstract = "In this study, sentiment classification and emotion distribution learning across domains are both formulated as a semi-supervised domain adaptation problem, which utilizes a small amount of labeled documents in the target domain for model training. By \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "74", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2023:DSB, author = "Hui Tang and Xun Liang and Yuhui Guo and Xiangping Zheng and Bo Wu and Sensen Zhang and Zhiying Li", title = "Diffuse and Smooth: Beyond Truncated Receptive Field for Scalable and Adaptive Graph Representation Learning", journal = j-TKDD, volume = "17", number = "5", pages = "75:1--75:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572781", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 8 07:47:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3572781", abstract = "As the scope of receptive field and the depth of Graph Neural Networks (GNNs) are two completely orthogonal aspects for graph learning, existing GNNs often have shallow layers with truncated-receptive field and far from achieving satisfactory performance. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "75", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2023:PPP, author = "Xiao Liu and Bonan Gao and Basem Suleiman and Han You and Zisu Ma and Yu Liu and Ali Anaissi", title = "Privacy-Preserving Personalized Fitness Recommender System {P$^3$FitRec}: a Multi-level Deep Learning Approach", journal = j-TKDD, volume = "17", number = "6", pages = "76:1--76:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572899", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3572899", abstract = "Recommender systems have been successfully used in many domains with the help of machine learning algorithms. However, such applications tend to use multi-dimensional user data, which has raised widespread concerns about the breach of users' privacy. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "76", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2023:TCS, author = "Jie Yang and Zhixiao Wang and Xiaobin Rui and Yahui Chai and Philip S. Yu and Lichao Sun", title = "Triadic Closure Sensitive Influence Maximization", journal = j-TKDD, volume = "17", number = "6", pages = "77:1--77:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3573011", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3573011", abstract = "The influence are not linked to any footnote in the text. Please check and suggest. maximization problem aims at selecting the k most influential nodes (i.e., seed nodes) from a social network, where the nodes can maximize the number of influenced nodes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "77", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cousins:2023:BBC, author = "Cyrus Cousins and Chloe Wohlgemuth and Matteo Riondato", title = "{Bavarian}: Betweenness Centrality Approximation with Variance-aware {Rademacher} Averages", journal = j-TKDD, volume = "17", number = "6", pages = "78:1--78:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577021", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3577021", abstract = "``[A]llain Gersten, Hopfen, und Wasser'' --- 1516 Reinheitsgebot We present Bavarian, a collection of sampling-based algorithms for approximating the Betweenness Centrality (BC) of all vertices in a graph. Our algorithms use Monte-Carlo Empirical Rademacher \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "78", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jalali:2023:FIF, author = "Zeinab S. Jalali and Qilan Chen and Shwetha M. Srikanta and Weixiang Wang and Myunghwan Kim and Hema Raghavan and Sucheta Soundarajan", title = "Fairness of Information Flow in Social Networks", journal = j-TKDD, volume = "17", number = "6", pages = "79:1--79:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578268", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3578268", abstract = "Social networks form a major parts of people's lives, and individuals often make important life decisions based on information that spreads through these networks. For this reason, it is important to know whether individuals from different protected \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "79", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:EEG, author = "Yunyi Li and Yongjing Hao and Pengpeng Zhao and Guanfeng Liu and Yanchi Liu and Victor S. Sheng and Xiaofang Zhou", title = "Edge-enhanced Global Disentangled Graph Neural Network for Sequential Recommendation", journal = j-TKDD, volume = "17", number = "6", pages = "80:1--80:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577928", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3577928", abstract = "Sequential recommendation has been a widely popular topic of recommender systems. Existing works have contributed to enhancing the prediction ability of sequential recommendation systems based on various methods, such as recurrent networks and self-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "80", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2023:HDP, author = "Wenjie Feng and Shenghua Liu and Xueqi Cheng", title = "Hierarchical Dense Pattern Detection in Tensors", journal = j-TKDD, volume = "17", number = "6", pages = "81:1--81:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577022", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3577022", abstract = "Dense subtensor detection gains remarkable success in spotting anomalies and fraudulent behaviors for multi-aspect data (i.e., tensors), like in social media and event streams. Existing methods detect the densest subtensors flatly and separately, with the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "81", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2023:GCD, author = "Jingmin Huang and Bowei Chen and Zhi Yan and Iadh Ounis and Jun Wang", title = "{GEO}: a Computational Design Framework for Automotive Exterior Facelift", journal = j-TKDD, volume = "17", number = "6", pages = "82:1--82:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578521", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3578521", abstract = "Exterior facelift has become an effective method for automakers to boost the consumers' interest in an existing car model before it is redesigned. To support the automotive facelift design process, this study develops a novel computational framework --- \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "82", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Singh:2023:MSM, author = "Karandeep Singh and Seungeon Lee and Giuseppe (Joe) Labianca and Jesse Michael Fagan and Meeyoung Cha", title = "Multi-Stage Machine Learning Model for Hierarchical Tie Valence Prediction", journal = j-TKDD, volume = "17", number = "6", pages = "83:1--83:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579096", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3579096", abstract = "Individuals interacting in organizational settings involving varying levels of formal hierarchy naturally form a complex network of social ties having different tie valences (e.g., positive and negative connections). Social ties critically affect \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "83", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Su:2023:NMF, author = "Sixing Su and Jiewen Guan and Bilian Chen and Xin Huang", title = "Nonnegative Matrix Factorization Based on Node Centrality for Community Detection", journal = j-TKDD, volume = "17", number = "6", pages = "84:1--84:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578520", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3578520", abstract = "Community detection is an important topic in network analysis, and recently many community detection methods have been developed on top of the Nonnegative Matrix Factorization (NMF) technique. Most NMF-based community detection methods only utilize the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "84", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:EEA, author = "Yuxiang Wang and Jun Liu and Xiaoliang Xu and Xiangyu Ke and Tianxing Wu and Xiaoxuan Gou", title = "Efficient and Effective Academic Expert Finding on Heterogeneous Graphs through {$ (k, P) $}-Core based Embedding", journal = j-TKDD, volume = "17", number = "6", pages = "85:1--85:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578365", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3578365", abstract = "Expert finding is crucial for a wealth of applications in both academia and industry. Given a user query and trove of academic papers, expert finding aims at retrieving the most relevant experts for the query, from the academic papers. Existing studies \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "85", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:SUI, author = "Yongjie Wang and Ke Wang and Cheng Long and Chunyan Miao", title = "Summarizing User-item Matrix By Group Utility Maximization", journal = j-TKDD, volume = "17", number = "6", pages = "86:1--86:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578586", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3578586", abstract = "A user-item utility matrix represents the utility (or preference) associated with each (user, item) pair, such as citation counts, rating/vote on items or locations, and clicks on items. A high utility value indicates a strong association of the pair. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "86", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ni:2023:MBP, author = "Peikun Ni and Jianming Zhu and Guoqing Wang", title = "Misinformation Blocking Problem in Virtual and Real Interconversion Social Networks", journal = j-TKDD, volume = "17", number = "6", pages = "87:1--87:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578936", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3578936", abstract = "With the in-depth development of intelligent media technology, online and offline fusion, reality and virtual entanglement, information content generalization, the boundary between positive and negative information is blurred, all kinds of misinformation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "87", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ren:2023:SFB, author = "Jinjun Ren and Yuping Wang and Xiyan Deng", title = "Slack-Factor-Based Fuzzy Support Vector Machine for Class Imbalance Problems", journal = j-TKDD, volume = "17", number = "6", pages = "88:1--88:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579050", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3579050", abstract = "Class imbalance and noisy data widely exist in real-world problems, and the support vector machine (SVM) is hard to construct good classifiers on these data. Fuzzy SVMs (FSVMs), as variants of SVM, use a fuzzy membership function both to reflect the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "88", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:TWP, author = "Lei Li and Zhiyuan Liu and Zan Zhang and Huanhuan Chen and Xindong Wu", title = "Three-way Preference Completion via Preference Graph", journal = j-TKDD, volume = "17", number = "6", pages = "89:1--89:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580368", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3580368", abstract = "With the personal partial rankings from agents over a subset of alternatives, the goal of preference completion is to infer the agent's personalized preference over all alternatives including those the agent has not yet handled from uncertain preference \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "89", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Khokhar:2023:DPR, author = "Rashid Hussain Khokhar and Benjamin C. M. Fung and Farkhund Iqbal and Khalil Al-Hussaeni and Mohammed Hussain", title = "Differentially Private Release of Heterogeneous Network for Managing Healthcare Data", journal = j-TKDD, volume = "17", number = "6", pages = "90:1--90:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580367", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Apr 17 11:51:51 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3580367", abstract = "With the increasing adoption of digital health platforms through mobile apps and online services, people have greater flexibility connecting with medical practitioners, pharmacists, and laboratories and accessing resources to manage their own health-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "90", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:RCM, author = "Mimi Zhang and Andrew Parnell", title = "Review of Clustering Methods for Functional Data", journal = j-TKDD, volume = "17", number = "7", pages = "91:1--91:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3581789", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3581789", abstract = "Functional data clustering is to identify heterogeneous morphological patterns in the continuous functions underlying the discrete measurements/observations. Application of functional data clustering has appeared in many publications across various fields \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "91", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:MLV, author = "Ling Chen and Dandan Lyu and Shanshan Yu and Gencai Chen", title = "Multi-Level Visual Similarity Based Personalized Tourist Attraction Recommendation Using Geo-Tagged Photos", journal = j-TKDD, volume = "17", number = "7", pages = "92:1--92:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582015", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3582015", abstract = "Geo-tagged photo-based tourist attraction recommendation can discover users' travel preferences from their taken photos, so as to recommend suitable tourist attractions to them. However, existing visual content-based methods cannot fully exploit the user \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "92", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2023:OSF, author = "Wanyue Xu and Zhongzhi Zhang", title = "Optimal Scale-Free Small-World Graphs with Minimum Scaling of Cover Time", journal = j-TKDD, volume = "17", number = "7", pages = "93:1--93:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3583691", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3583691", abstract = "The cover time of random walks on a graph has found wide practical applications in different fields of computer science, such as crawling and searching on the World Wide Web and query processing in sensor networks, with the application effects dependent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "93", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sheth:2023:CDI, author = "Paras Sheth and Ruocheng Guo and Lu Cheng and Huan Liu and Kasim Sel{\c{c}}uk Candan", title = "Causal Disentanglement for Implicit Recommendations with Network Information", journal = j-TKDD, volume = "17", number = "7", pages = "94:1--94:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582435", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3582435", abstract = "Online user engagement is highly influenced by various machine learning models, such as recommender systems. These systems recommend new items to the user based on the user's historical interactions. Implicit recommender systems reflect a binary setting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "94", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:ESM, author = "Yihong Zhang and Xiu Susie Fang and Takahiro Hara", title = "Evolving Social Media Background Representation with Frequency Weights and Co-Occurrence Graphs", journal = j-TKDD, volume = "17", number = "7", pages = "95:1--95:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3585389", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3585389", abstract = "Social media as a background information source has been utilized in many practical computational tasks, such as stock price prediction, epidemic tracking, and product recommendation. However, proper representation of an evolving social media background \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "95", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:NWV, author = "Huiru Li and Liangxiao Jiang and Siqing Xue", title = "Neighborhood Weighted Voting-Based Noise Correction for Crowdsourcing", journal = j-TKDD, volume = "17", number = "7", pages = "96:1--96:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586998", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3586998", abstract = "In crowdsourcing scenarios, we can obtain each instance's multiple noisy labels set from different crowd workers and then use a ground truth inference algorithm to infer its integrated label. Despite the effectiveness of ground truth inference algorithms, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "96", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:DNE, author = "He Li and Duo Jin and Xuejiao Li and Jianbin Huang and Xiaoke Ma and Jiangtao Cui and Deshuang Huang and Shaojie Qiao and Jaesoo Yoo", title = "{DMGF-Net}: an Efficient Dynamic Multi-Graph Fusion Network for Traffic Prediction", journal = j-TKDD, volume = "17", number = "7", pages = "97:1--97:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586164", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3586164", abstract = "Traffic prediction is the core task of intelligent transportation system (ITS) and accurate traffic prediction can greatly improve the utilization of public resources. Dynamic interaction of multiple spatial relationships will influence the accuracy of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "97", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2023:CDC, author = "Boxiang Zhao and Shuliang Wang and Lianhua Chi and Qi Li and Xiaojia Liu and Jing Geng", title = "Causal Discovery via Causal Star Graphs", journal = j-TKDD, volume = "17", number = "7", pages = "98:1--98:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586997", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3586997", abstract = "Discovering causal relationships among observed variables is an important research focus in data mining. Existing causal discovery approaches are mainly based on constraint-based methods and functional causal models (FCMs). However, the constraint-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "98", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:GDL, author = "Dexian Wang and Tianrui Li and Ping Deng and Fan Zhang and Wei Huang and Pengfei Zhang and Jia Liu", title = "A Generalized Deep Learning Clustering Algorithm Based on Non-Negative Matrix Factorization", journal = j-TKDD, volume = "17", number = "7", pages = "99:1--99:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584862", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3584862", abstract = "Clustering is a popular research topic in the field of data mining, in which the clustering method based on non-negative matrix factorization (NMF) has been widely employed. However, in the update process of NMF, there is no learning rate to guide the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "99", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Biswas:2023:RIM, author = "Tarun Kumer Biswas and Alireza Abbasi and Ripon Kumar Chakrabortty", title = "Robust Influence Maximization Under Both Aleatory and Epistemic Uncertainty", journal = j-TKDD, volume = "17", number = "7", pages = "100:1--100:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587100", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3587100", abstract = "Uncertainty is ubiquitous in almost every real-life optimization problem, which must be effectively managed to get a robust outcome. This is also true for the Influence Maximization (IM) problem, which entails locating a set of influential users within a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "100", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2023:DAD, author = "Yuhui Guo and Xun Liang and Bo Wu and Xiangping Zheng and Xuan Zhang", title = "Dual-aware Domain Mining and Cross-aware Supervision for Weakly-supervised Semantic Segmentation", journal = j-TKDD, volume = "17", number = "7", pages = "101:1--101:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589343", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3589343", abstract = "Weakly Supervised Semantic Segmentation with image-level annotation uses localization maps from the classifier to generate pseudo labels. However, such localization maps focus only on sparse salient object regions, it is difficult to generate high-quality \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "101", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2023:FIU, author = "Jiezhu Cheng and Kaizhu Huang and Zibin Zheng", title = "Fitting Imbalanced Uncertainties in Multi-output Time Series Forecasting", journal = j-TKDD, volume = "17", number = "7", pages = "102:1--102:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584704", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3584704", abstract = "We focus on multi-step ahead time series forecasting with the multi-output strategy. From the perspective of multi-task learning (MTL), we recognize imbalanced uncertainties between prediction tasks of different future time steps. Unexpectedly, trained by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "102", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:MVE, author = "Xuanqi Zhang and Qiangqiang Shen and Yongyong Chen and Guokai Zhang and Zhongyun Hua and Jingyong Su", title = "Multi-view Ensemble Clustering via Low-rank and Sparse Decomposition: From Matrix to Tensor", journal = j-TKDD, volume = "17", number = "7", pages = "103:1--103:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589768", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3589768", abstract = "As a significant extension of classical clustering methods, ensemble clustering first generates multiple basic clusterings and then fuses them into one consensus partition by solving a problem concerning graph partition with respect to the co-association \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "103", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:DDQ, author = "Sensen Zhang and Xun Liang and Hui Tang and Xiangping Zheng and Alex X. Zhang and Yuefeng Ma", title = "{DuCape}: Dual Quaternion and Capsule Network-Based Temporal Knowledge Graph Embedding", journal = j-TKDD, volume = "17", number = "7", pages = "104:1--104:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589644", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3589644", abstract = "Recently, with the development of temporal knowledge graph technology, more and more Temporal Knowledge Graph Embedded (TKGE) models have been developed. The effectiveness of TKGE largely depends on the ability to model intrinsic relation patterns and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "104", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:TID, author = "Hao Wang and Bin Guo and Jiaqi Liu and Yasan Ding and Zhiwen Yu", title = "Towards Informative and Diverse Dialogue Systems Over Hierarchical Crowd Intelligence Knowledge Graph", journal = j-TKDD, volume = "17", number = "7", pages = "105:1--105:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3583758", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3583758", abstract = "Knowledge-enhanced dialogue systems aim at generating factually correct and coherent responses by reasoning over knowledge sources, which is a promising research trend. The truly harmonious human-agent dialogue systems need to conduct engaging \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "105", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ragab:2023:ABS, author = "Mohamed Ragab and Emadeldeen Eldele and Wee Ling Tan and Chuan-Sheng Foo and Zhenghua Chen and Min Wu and Chee-Keong Kwoh and Xiaoli Li", title = "{ADATIME}: a Benchmarking Suite for Domain Adaptation on Time Series Data", journal = j-TKDD, volume = "17", number = "8", pages = "106:1--106:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587937", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3587937", abstract = "Unsupervised domain adaptation methods aim at generalizing well on unlabeled test data that may have a different (shifted) distribution from the training data. Such methods are typically developed on image data, and their application to time series data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "106", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Halstead:2023:CDM, author = "Ben Halstead and Yun Sing Koh and Patricia Riddle and Mykola Pechenizkiy and Albert Bifet", title = "Combining Diverse Meta-Features to Accurately Identify Recurring Concept Drift in Data Streams", journal = j-TKDD, volume = "17", number = "8", pages = "107:1--107:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587098", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3587098", abstract = "Learning from streaming data is challenging as the distribution of incoming data may change over time, a phenomenon known as concept drift. The predictive patterns, or experience learned under one distribution may become irrelevant as conditions change \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "107", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shi:2023:KFD, author = "Linrui Shi and Zheng Zhang and Zizhu Fan and Chao Xi and Zhengming Li and Gaochang Wu", title = "{Kernel Fisher Dictionary Transfer Learning}", journal = j-TKDD, volume = "17", number = "8", pages = "108:1--108:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588575", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3588575", abstract = "Dictionary learning is an efficient knowledge representation method that can learn the essential features of data. Traditional dictionary learning methods are difficult to obtain nonlinear information when processing large-scale and high-dimensional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "108", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2023:GNN, author = "Heli Sun and Miaomiao Sun and Xuechun Liu and Linlin Zhu and Liang He and Xiaolin Jia and Yuan Chen", title = "Graph Neural Networks with Motisf-aware for Tenuous Subgraph Finding", journal = j-TKDD, volume = "17", number = "8", pages = "109:1--109:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589643", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3589643", abstract = "Tenuous subgraph finding aims to detect a subgraph with few social interactions and weak relationships among nodes. Despite significant efforts made on this task, they are mostly carried out in view of graph-structured data. These methods depend on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "109", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2023:LES, author = "Likang Wu and Hongke Zhao and Zhi Li and Zhenya Huang and Qi Liu and Enhong Chen", title = "Learning the Explainable Semantic Relations via Unified Graph Topic-Disentangled Neural Networks", journal = j-TKDD, volume = "17", number = "8", pages = "110:1--110:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589964", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3589964", abstract = "Graph Neural Networks (GNNs) such as Graph Convolutional Networks (GCNs) can effectively learn node representations via aggregating neighbors based on the relation graph. However, despite a few exceptions, most of the previous work in this line does not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "110", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jia:2023:SSB, author = "Bohan Jia and Jian Cao and Shiyou Qian and Nengjun Zhu and Xin Dong and Liang Zhang and Lei Cheng and Linjian Mo", title = "{SMONE}: a Session-based Recommendation Model Based on Neighbor Sessions with Similar Probabilistic Intentions", journal = j-TKDD, volume = "17", number = "8", pages = "111:1--111:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587099", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3587099", abstract = "A session-based recommendation system (SRS) tries to predict the next possible choice of anonymous users. In recent years, graph neural network (GNN) models have been successfully applied to SRSs and have achieved great success. Using GNN models in SRSs, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "111", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hassan:2023:CGD, author = "Zohair Raza Hassan and Sarwan Ali and Imdadullah Khan and Mudassir Shabbir and Waseem Abbas", title = "Computing Graph Descriptors on Edge Streams", journal = j-TKDD, volume = "17", number = "8", pages = "112:1--112:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3591468", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3591468", abstract = "Feature extraction is an essential task in graph analytics. These feature vectors, called graph descriptors, are used in downstream vector-space-based graph analysis models. This idea has proved fruitful in the past, with spectral-based graph descriptors \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "112", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Canfora:2023:NCT, author = "Gerardo Canfora and Francesco Mercaldo and Antonella Santone", title = "A Novel Classification Technique based on Formal Methods", journal = j-TKDD, volume = "17", number = "8", pages = "113:1--113:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592796", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3592796", abstract = "In last years, we are witnessing a growing interest in the application of supervised machine learning techniques in the most disparate fields. One winning factor of machine learning is represented by its ability to easily create models, as it does not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "113", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2023:MVG, author = "Bei Lin and You Li and Ning Gui and Zhuopeng Xu and Zhiwu Yu", title = "Multi-view Graph Representation Learning Beyond Homophily", journal = j-TKDD, volume = "17", number = "8", pages = "114:1--114:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592858", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3592858", abstract = "Unsupervised graph representation learning (GRL) aims at distilling diverse graph information into task-agnostic embeddings without label supervision. Due to a lack of support from labels, recent representation learning methods usually adopt self-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "114", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tajeuna:2023:MRS, author = "Etienne Gael Tajeuna and Mohamed Bouguessa and Shengrui Wang", title = "Modeling Regime Shifts in Multiple Time Series", journal = j-TKDD, volume = "17", number = "8", pages = "115:1--115:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592857", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3592857", abstract = "We investigate the problem of discovering and modeling regime shifts in an ecosystem comprising multiple time series known as co-evolving time series. Regime shifts refer to the changing behaviors exhibited by series at different time intervals. Learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "115", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shi:2023:ACS, author = "Dan Shi and Lei Zhu and Xiao Dong and Xuemeng Song and Jingjing Li and Zhiyong Cheng", title = "Adaptive Collaborative Soft Label Learning for Unsupervised Multi-View Feature Selection", journal = j-TKDD, volume = "17", number = "8", pages = "116:1--116:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3591467", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3591467", abstract = "Unsupervised multi-view feature selection aims to select informative features with multi-view features and unsupervised learning. It is a challenging problem due to the absence of explicit semantic supervision. Recently, graph theory and hard pseudo-label \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "116", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:CIT, author = "Hao Zhang and Yewei Xia and Kun Zhang and Shuigeng Zhou and Jihong Guan", title = "Conditional Independence Test Based on Residual Similarity", journal = j-TKDD, volume = "17", number = "8", pages = "117:1--117:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3593810", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3593810", abstract = "Recently, many regression-based conditional independence (CI) test methods have been proposed to solve the problem of causal discovery. These methods provide alternatives to test CI of x,y given Z by first removing the information of the controlling set Z \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "117", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yuan:2023:IVD, author = "Junkun Yuan and Xu Ma and Ruoxuan Xiong and Mingming Gong and Xiangyu Liu and Fei Wu and Lanfen Lin and Kun Kuang", title = "Instrumental Variable-Driven Domain Generalization with Unobserved Confounders", journal = j-TKDD, volume = "17", number = "8", pages = "118:1--118:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3595380", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3595380", abstract = "Domain generalization (DG) aims to learn from multiple source domains a model that can generalize well on unseen target domains. Existing DG methods mainly learn the representations with invariant marginal distribution of the input features, however, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "118", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qin:2023:CBI, author = "Xi Qin and Cheng Zhong and Hai Xiang Lin", title = "Community-Based Influence Maximization Using Network Embedding in Dynamic Heterogeneous Social Networks", journal = j-TKDD, volume = "17", number = "8", pages = "119:1--119:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594544", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3594544", abstract = "Influence maximization (IM) is a very important issue in social network diffusion analysis. The topology of real social network is large-scale, dynamic, and heterogeneous. The heterogeneity, and continuous expansion and evolution of social network pose a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "119", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhuang:2023:CLB, author = "Jiabo Zhuang and Shunmei Meng and Jing Zhang and Victor S. Sheng", title = "Contrastive Learning Based Graph Convolution Network for Social Recommendation", journal = j-TKDD, volume = "17", number = "8", pages = "120:1--120:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587268", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 3 07:15:57 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3587268", abstract = "Exploiting social networks is expected to enhance the performance of recommender systems when interaction information is sparse. Existing social recommendation models focus on modeling multi-graph structures and then aggregating the information from these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "120", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:RNR, author = "Liang Zhang and Cheng Long", title = "Road Network Representation Learning: a Dual Graph-based Approach", journal = j-TKDD, volume = "17", number = "9", pages = "121:1--121:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592859", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3592859", abstract = "Road network is a critical infrastructure powering many applications including transportation, mobility and logistics in real life. To leverage the input of a road network across these different applications, it is necessary to learn the representations \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "121", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Syed:2023:SST, author = "Tahir Syed and Behroz Mirza", title = "Self-supervision for Tabular Data by Learning to Predict Additive Homoskedastic {Gaussian} Noise as Pretext", journal = j-TKDD, volume = "17", number = "9", pages = "122:1--122:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594720", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3594720", abstract = "The lack of scalability of data annotation translates to the need to decrease dependency on labels. Self-supervision offers a solution with data training themselves. However, it has received relatively less attention on tabular data, data that drive a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "122", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:HCT, author = "Xiaona Li and Zhu Wang and Xindong Chen and Bin Guo and Zhiwen Yu", title = "A Hybrid Continuous-Time Dynamic Graph Representation Learning Model by Exploring Both Temporal and Repetitive Information", journal = j-TKDD, volume = "17", number = "9", pages = "123:1--123:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596447", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3596447", abstract = "Recently, dynamic graph representation learning has attracted more and more attention from both academic and industrial communities due to its capabilities of capturing different real-world phenomena. For a dynamic graph represented as a sequence of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "123", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ou:2023:STS, author = "Junjie Ou and Haiming Jin and Xiaocheng Wang and Hao Jiang and Xinbing Wang and Chenghu Zhou", title = "{STA-TCN}: Spatial-temporal Attention over Temporal Convolutional Network for Next Point-of-interest Recommendation", journal = j-TKDD, volume = "17", number = "9", pages = "124:1--124:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596497", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3596497", abstract = "Recent years have witnessed a vastly increasing popularity of location-based social networks (LBSNs), which facilitates studies on the next Point-of-Interest (POI) recommendation problem. A user's POI visiting behavior shows the sequential transition. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "124", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2023:MLS, author = "Shaowei Jiang and Wei He and Lizhen Cui and Yonghui Xu and Lei Liu", title = "Modeling Long- and Short-Term User Preferences via Self-Supervised Learning for Next {POI} Recommendation", journal = j-TKDD, volume = "17", number = "9", pages = "125:1--125:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597211", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3597211", abstract = "With the accumulation of check-in data from location-based services, next Point-of-Interest (POI) recommendations are gaining increasing attention. It is well known that the spatio-temporal contextual information of user check-in behavior plays a crucial \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "125", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jang:2023:AOS, author = "Jun-Gi Jang and Sooyeon Shim and Vladimir Egay and Jeeyong Lee and Jongmin Park and Suhyun Chae and U. Kang", title = "Accurate Open-Set Recognition for Memory Workload", journal = j-TKDD, volume = "17", number = "9", pages = "126:1--126:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597027", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3597027", abstract = "How can we accurately identify new memory workloads while classifying known memory workloads? Verifying DRAM (Dynamic Random Access Memory) using various workloads is an important task to guarantee the quality of DRAM. A crucial component in the process \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "126", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qin:2023:TBT, author = "Meng Qin and Chaorui Zhang and Bo Bai and Gong Zhang and Dit-Yan Yeung", title = "Towards a Better Tradeoff between Quality and Efficiency of Community Detection: an Inductive Embedding Method across Graphs", journal = j-TKDD, volume = "17", number = "9", pages = "127:1--127:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596605", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3596605", abstract = "Many network applications can be formulated as NP-hard combinatorial optimization problems of community detection (CD) that partitions nodes of a graph into several groups with dense linkage. Most existing CD methods are transductive, which independently \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "127", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2023:LRR, author = "Haoran Chen and Xu Chen and Hongwei Tao and Zuhe Li and Xiao Wang", title = "Low-rank Representation with Adaptive Dimensionality Reduction via Manifold Optimization for Clustering", journal = j-TKDD, volume = "17", number = "9", pages = "128:1--128:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589767", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3589767", abstract = "The dimensionality reduction techniques are often used to reduce data dimensionality for computational efficiency or other purposes in existing low-rank representation (LRR)-based methods. However, the two steps of dimensionality reduction and learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "128", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2023:TTP, author = "Ye Liu and Han Wu and Zhenya Huang and Hao Wang and Yuting Ning and Jianhui Ma and Qi Liu and Enhong Chen", title = "{TechPat}: Technical Phrase Extraction for Patent Mining", journal = j-TKDD, volume = "17", number = "9", pages = "129:1--129:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596603", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3596603", abstract = "In recent years, due to the explosive growth of patent applications, patent mining has drawn extensive attention and interest. An important issue of patent mining is that of recognizing the technologies contained in patents, which serves as a fundamental \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "129", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:MRA, author = "Chunyang Wang and Yanmin Zhu and Haobing Liu and Tianzi Zang and Ke Wang and Jiadi Yu", title = "Multifaceted Relation-aware Meta-learning with Dual Customization for User Cold-start Recommendation", journal = j-TKDD, volume = "17", number = "9", pages = "130:1--130:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597458", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3597458", abstract = "User cold-start scenarios pose great challenges to recommendation systems in accurately capturing user preferences with sparse interaction records. Besides incorporating auxiliary information to enrich user/item representations, recent studies under the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "130", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2023:ITB, author = "Zhiwen Yu and Minling Dang and Qilong Wu and Liming Chen and Yujin Xie and Yu Wang and Bin Guo", title = "An Information Theory Based Method for Quantifying the Predictability of Human Mobility", journal = j-TKDD, volume = "17", number = "9", pages = "131:1--131:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597500", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3597500", abstract = "Research on human mobility drives the development of economy and society. How to predict when and where one will go accurately is one of the core research questions. Existing work is mainly concerned with performance of mobility prediction models. Since \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "131", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ramezani:2023:JID, author = "Maryam Ramezani and Aryan Ahadinia and Amirmohammad Ziaei Bideh and Hamid R. Rabiee", title = "Joint Inference of Diffusion and Structure in Partially Observed Social Networks Using Coupled Matrix Factorization", journal = j-TKDD, volume = "17", number = "9", pages = "132:1--132:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3599237", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3599237", abstract = "Access to complete data in large-scale networks is often infeasible. Therefore, the problem of missing data is a crucial and unavoidable issue in the analysis and modeling of real-world social networks. However, most of the research on different aspects \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "132", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2023:SIM, author = "Yandi Li and Haobo Gao and Yunxuan Gao and Jianxiong Guo and Weili Wu", title = "A Survey on Influence Maximization: From an {ML}-Based Combinatorial Optimization", journal = j-TKDD, volume = "17", number = "9", pages = "133:1--133:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604559", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3604559", abstract = "Influence Maximization (IM) is a classical combinatorial optimization problem, which can be widely used in mobile networks, social computing, and recommendation systems. It aims at selecting a small number of users such that maximizing the influence \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "133", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2023:MLF, author = "Zan Zhang and Zhe Zhang and Jialu Yao and Lin Liu and Jiuyong Li and Gongqing Wu and Xindong Wu", title = "Multi-Label Feature Selection Via Adaptive Label Correlation Estimation", journal = j-TKDD, volume = "17", number = "9", pages = "134:1--134:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604560", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3604560", abstract = "In multi-label learning, each instance is associated with multiple labels simultaneously. Multi-label data often have noisy, irrelevant, and redundant features of high dimensionality. Multi-label feature selection has received considerable attention as an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "134", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ren:2023:CCG, author = "Siyuan Ren and Bin Guo and Ke Li and Qianru Wang and Qinfen Wang and Zhiwen Yu", title = "{CoupledGT}: Coupled Geospatial-temporal Data Modeling for Air Quality Prediction", journal = j-TKDD, volume = "17", number = "9", pages = "135:1--135:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604616", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Aug 19 07:15:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3604616", abstract = "Air pollution seriously affects public health, while effective air quality prediction remains a challenging problem since the complex spatial-temporal couplings exist in multi-area monitoring data of the city. Current approaches rarely consider relative \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "135", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:HHG, author = "Youru Li and Zhenfeng Zhu and Xiaobo Guo and Shaoshuai Li and Yuchen Yang and Yao Zhao", title = "{HGV4Risk}: Hierarchical Global View-guided Sequence Representation Learning for Risk Prediction", journal = j-TKDD, volume = "18", number = "1", pages = "1:1--1:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605895", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3605895", abstract = "Risk prediction, usually achieved by learning representations from patient's physiological sequence or user's behavioral sequence data, and has been widely applied in healthcare and finance. Despite that, some recent time-aware deep learning methods have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Amagata:2024:EDP, author = "Daichi Amagata and Takahiro Hara", title = "Efficient Density-peaks Clustering Algorithms on Static and Dynamic Data in {Euclidean} Space", journal = j-TKDD, volume = "18", number = "1", pages = "2:1--2:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3607873", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3607873", abstract = "Clustering multi-dimensional points is a fundamental task in many fields, and density-based clustering supports many applications because it can discover clusters of arbitrary shapes. This article addresses the problem of Density-Peaks Clustering (DPC) in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Deng:2024:TNF, author = "Jiewen Deng and Jinliang Deng and Du Yin and Renhe Jiang and Xuan Song", title = "{TTS-Norm}: Forecasting Tensor Time Series via Multi-Way Normalization", journal = j-TKDD, volume = "18", number = "1", pages = "3:1--3:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605894", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3605894", abstract = "Tensor time series (TTS) data, a generalization of one-dimensional time series on a high-dimensional space, is ubiquitous in real-world applications. Compared to modeling time series or multivariate time series, which has received much attention and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moreo:2024:MLQ, author = "Alejandro Moreo and Manuel Francisco and Fabrizio Sebastiani", title = "Multi-Label Quantification", journal = j-TKDD, volume = "18", number = "1", pages = "4:1--4:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3606264", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3606264", abstract = "Quantification, variously called supervised prevalence estimation or learning to quantify, is the supervised learning task of generating predictors of the relative frequencies (a.k.a. prevalence values ) of the classes of interest in unlabelled data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:HSF, author = "Chunkai Zhang and Yuting Yang and Zilin Du and Wensheng Gan and Philip S. Yu", title = "{HUSP-SP}: Faster Utility Mining on Sequence Data", journal = j-TKDD, volume = "18", number = "1", pages = "5:1--5:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3597935", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3597935", abstract = "High-utility sequential pattern mining (HUSPM) has emerged as an important topic due to its wide application and considerable popularity. However, due to the combinatorial explosion of the search space when the HUSPM problem encounters a low-utility \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:MVG, author = "Zhaoliang Chen and Lele Fu and Shunxin Xiao and Shiping Wang and Claudia Plant and Wenzhong Guo", title = "Multi-View Graph Convolutional Networks with Differentiable Node Selection", journal = j-TKDD, volume = "18", number = "1", pages = "6:1--6:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3608954", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3608954", abstract = "Multi-view data containing complementary and consensus information can facilitate representation learning by exploiting the intact integration of multi-view features. Because most objects in the real world often have underlying connections, organizing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Luo:2024:DLC, author = "Fangyuan Luo and Jun Wu and Tao Wang", title = "Discrete Listwise Content-aware Recommendation", journal = j-TKDD, volume = "18", number = "1", pages = "7:1--7:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3609334", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3609334", abstract = "To perform online inference efficiently, hashing techniques, devoted to encoding model parameters as binary codes, play a key role in reducing the computational cost of content-aware recommendation (CAR), particularly on devices with limited computation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:SGC, author = "Huiyuan Li and Li Yu and Xi Niu and Youfang Leng and Qihan Du", title = "Sequential and Graphical Cross-Domain Recommendations with a Multi-View Hierarchical Transfer Gate", journal = j-TKDD, volume = "18", number = "1", pages = "8:1--8:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604615", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3604615", abstract = "Cross-domain recommender systems could potentially improve the recommendation performance by means of transferring abundant knowledge from the auxiliary domain to the target domain. They could help address some key challenges in recommender systems, such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mai:2024:SCC, author = "Weiming Mai and Jiangchao Yao and Gong Chen and Ya Zhang and Yiu-Ming Cheung and Bo Han", title = "Server-Client Collaborative Distillation for Federated Reinforcement Learning", journal = j-TKDD, volume = "18", number = "1", pages = "9:1--9:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604939", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3604939", abstract = "Federated Learning (FL) learns a global model in a distributional manner, which does not require local clients to share private data. Such merit has drawn lots of attention in the interaction scenarios, where Federated Reinforcement Learning (FRL) emerges \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2024:FRS, author = "Yao Wu and Jian Cao and Guandong Xu", title = "Fairness in Recommender Systems: Evaluation Approaches and Assurance Strategies", journal = j-TKDD, volume = "18", number = "1", pages = "10:1--10:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604558", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3604558", abstract = "With the wide application of recommender systems, the potential impacts of recommender systems on customers, item providers and other parties have attracted increasing attention. Fairness, which is the quality of treating people equally, is also becoming \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:TTD, author = "Huan Wang and Guoquan Liu and Po Hu", title = "{TDAN}: Transferable Domain Adversarial Network for Link Prediction in Heterogeneous Social Networks", journal = j-TKDD, volume = "18", number = "1", pages = "11:1--11:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3610229", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3610229", abstract = "Link prediction has received increased attention in social network analysis. One of the unique challenges in heterogeneous social networks is link prediction in new link types without verified link information, such as recommending products to new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Corbara:2024:SDD, author = "Silvia Corbara and Alejandro Moreo and Fabrizio Sebastiani", title = "Same or Different? {Diff}-Vectors for Authorship Analysis", journal = j-TKDD, volume = "18", number = "1", pages = "12:1--12:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3609226", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3609226", abstract = "In this article, we investigate the effects on authorship identification tasks (including authorship verification, closed-set authorship attribution, and closed-set and open-set same-author verification) of a fundamental shift in how to conceive the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:RRH, author = "Jincheng Huang and Ping Li and Rui Huang and Na Chen and Acong Zhang", title = "Revisiting the Role of Heterophily in Graph Representation Learning: an Edge Classification Perspective", journal = j-TKDD, volume = "18", number = "1", pages = "13:1--13:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3603378", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3603378", abstract = "Graph representation learning aims at integrating node contents with graph structure to learn nodes/graph representations. Nevertheless, it is found that many existing graph learning methods do not work well on data with high heterophily level that \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Luo:2024:CBH, author = "Xiao Luo and Daqing Wu and Yiyang Gu and Chong Chen and Luchen Liu and Jinwen Ma and Ming Zhang and Minghua Deng and Jianqiang Huang and Xian-Sheng Hua", title = "Criterion-based Heterogeneous Collaborative Filtering for Multi-behavior Implicit Recommendation", journal = j-TKDD, volume = "18", number = "1", pages = "14:1--14:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3611310", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3611310", abstract = "Recent years have witnessed the explosive growth of interaction behaviors in multimedia information systems, where multi-behavior recommender systems have received increasing attention by leveraging data from various auxiliary behaviors such as tip and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:DDP, author = "Huiting Liu and Yu Zhang and Peipei Li and Cheng Qian and Peng Zhao and Xindong Wu", title = "{DeepCPR}: Deep Path Reasoning Using Sequence of User-Preferred Attributes for Conversational Recommendation", journal = j-TKDD, volume = "18", number = "1", pages = "15:1--15:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3610775", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3610775", abstract = "Conversational recommender systems (CRS) have garnered significant attention in academia and industry because of their ability to capture user preferences via system questions and user responses. Typically, in a CRS, reinforcement learning (RL) is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:DAD, author = "Qiuyue Zhang and Yunfeng Zhang and Xunxiang Yao and Shilong Li and Caiming Zhang and Peide Liu", title = "A Dynamic Attributes-driven Graph Attention Network Modeling on Behavioral Finance for Stock Prediction", journal = j-TKDD, volume = "18", number = "1", pages = "16:1--16:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3611311", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3611311", abstract = "Stock prediction is a challenging task due to multiple influencing factors and complex market dependencies. Traditional solutions are based on a single type of information. With the success of multi-source information in different fields, the combination \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chowdhury:2024:INC, author = "Anjan Chowdhury and Sriram Srinivasan and Animesh Mukherjee and Sanjukta Bhowmick and Kuntal Ghosh", title = "Improving Node Classification Accuracy of {GNN} through Input and Output Intervention", journal = j-TKDD, volume = "18", number = "1", pages = "17:1--17:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3610535", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3610535", abstract = "Graph Neural Networks (GNNs) are a popular machine learning framework for solving various graph processing applications. This framework exploits both the graph topology and the feature vectors of the nodes. One of the important applications of GNN is in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:SSD, author = "Ke-Jia Chen and Linsong Liu and Linpu Jiang and Jingqiang Chen", title = "Self-Supervised Dynamic Graph Representation Learning via Temporal Subgraph Contrast", journal = j-TKDD, volume = "18", number = "1", pages = "18:1--18:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3612931", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3612931", abstract = "Self-supervised learning on graphs has recently drawn a lot of attention due to its independence from labels and its robustness in representation. Current studies on this topic mainly use static information such as graph structures but cannot well capture \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2024:LBC, author = "Yan Sun and Yi Han and Jicong Fan", title = "{Laplacian}-based Cluster-Contractive $t$-{SNE} for High-Dimensional Data Visualization", journal = j-TKDD, volume = "18", number = "1", pages = "19:1--19:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3612932", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3612932", abstract = "Dimensionality reduction techniques aim at representing high-dimensional data in low-dimensional spaces to extract hidden and useful information or facilitate visual understanding and interpretation of the data. However, few of them take into \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ge:2024:DCC, author = "Yong-Feng Ge and Elisa Bertino and Hua Wang and Jinli Cao and Yanchun Zhang", title = "Distributed Cooperative Coevolution of Data Publishing Privacy and Transparency", journal = j-TKDD, volume = "18", number = "1", pages = "20:1--20:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3613962", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3613962", abstract = "Data transparency is beneficial to data participants' awareness, users' fairness, and research work's reproducibility. However, when addressing transparency requirements, we cannot ignore data privacy. This article defines the multi-objective data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Strukova:2024:AKI, author = "Sofia Strukova and Jos{\'e} A. Ruip{\'e}rez-Valiente and F{\'e}lix G{\'o}mez M{\'a}rmol", title = "Adapting Knowledge Inference Algorithms to Measure Geometry Competencies through a Puzzle Game", journal = j-TKDD, volume = "18", number = "1", pages = "21:1--21:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3614436", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3614436", abstract = "The rapid technological evolution of the last years has motivated students to develop capabilities that will prepare them for an unknown future in the 21st century. In this context, many teachers intend to optimise the learning process, making it more \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:ETL, author = "Bo Liu and Liangjiao Li and Yanshan Xiao and Kai Wang and Jian Hu and Junrui Liu and Qihang Chen and Ruiguang Huang", title = "An Efficient Transfer Learning Method with Auxiliary Information", journal = j-TKDD, volume = "18", number = "1", pages = "22:1--22:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3612930", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3612930", abstract = "Transfer learning (TL) is an information reuse learning tool, which can help us learn better classification effect than traditional single task learning, because transfer learning can share information within the task-to-task model. Most TL algorithms are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:SEA, author = "Zhong Li and Yuxuan Zhu and Matthijs {Van Leeuwen}", title = "A Survey on Explainable Anomaly Detection", journal = j-TKDD, volume = "18", number = "1", pages = "23:1--23:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3609333", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3609333", abstract = "In the past two decades, most research on anomaly detection has focused on improving the accuracy of the detection, while largely ignoring the explainability of the corresponding methods and thus leaving the explanation of outcomes to practitioners. As \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2024:TLA, author = "Meng Jiang", title = "Transfer Learning across Graph Convolutional Networks: Methods, Theory, and Applications", journal = j-TKDD, volume = "18", number = "1", pages = "24:1--24:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617376", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3617376", abstract = "Graph neural networks have been widely used for learning representations of nodes for many downstream tasks on graph data. Existing models were designed for the nodes on a single graph, which would not be able to utilize information across multiple \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:CQM, author = "Lizhen Wang and Vanha Tran and Thanhcong Do", title = "A Clique-Querying Mining Framework for Discovering High Utility Co-Location Patterns without Generating Candidates", journal = j-TKDD, volume = "18", number = "1", pages = "25:1--25:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617378", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3617378", abstract = "Groups of spatial features whose instances frequently appear together in nearby areas are regarded as prevalent co-location patterns (PCPs). Traditional PCP mining ignores the significance of instances and features. However, in reality, these instances \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fu:2024:MUC, author = "Zhe Fu and Xi Niu", title = "Modeling Users' Curiosity in Recommender Systems", journal = j-TKDD, volume = "18", number = "1", pages = "26:1--26:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617598", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3617598", abstract = "Today's recommender systems are criticized for recommending items that are too obvious to arouse users' interests. Therefore, the research community has advocated some ``beyond accuracy'' evaluation metrics such as novelty, diversity, and serendipity with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:OGC, author = "Hui-Jia Li and Yuhao Feng and Chengyi Xia and Jie Cao", title = "Overlapping Graph Clustering in Attributed Networks via Generalized Cluster Potential Game", journal = j-TKDD, volume = "18", number = "1", pages = "27:1--27:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3597436", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3597436", abstract = "Overlapping graph clustering is essential to understand the nature and behavior of real complex systems including human interactions, technical systems and transportation network. However, in addition of topological structure, many real-world networked \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:UKG, author = "Yu Liu and Zhilun Zhou and Yong Li and Depeng Jin", title = "Urban Knowledge Graph Aided Mobile User Profiling", journal = j-TKDD, volume = "18", number = "1", pages = "28:1--28:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3596604", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3596604", abstract = "Nowadays, the explosive growth of personalized web applications and the rapid development of artificial intelligence technology have flourished the recent research on mobile user profiling, i.e., inferring the user profile from mobile behavioral data. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kaibiao:2024:ANG, author = "Lin Kaibiao and Jinpo Chen and Chen Ruicong and Yang Fan and Zhang Yang and Lin Min and Lu Ping", title = "Adaptive Neighbor Graph Aggregated Graph Attention Network for Heterogeneous Graph Embedding", journal = j-TKDD, volume = "18", number = "1", pages = "29:1--29:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3616377", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3616377", abstract = "Graph attention network can generate effective feature embedding by specifying different weights to different nodes. The key of the research on heterogeneous graph embedding is the way to combine its rich structural information with semantic relations to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:MMG, author = "Yashen Wang and Xiaoye Ouyang and Dayu Guo and Xiaoling Zhu", title = "{MEGA}: Meta-Graph Augmented Pre-Training Model for Knowledge Graph Completion", journal = j-TKDD, volume = "18", number = "1", pages = "30:1--30:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617379", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:43 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3617379", abstract = "Nowadays, a large number of Knowledge Graph Completion (KGC) methods have been proposed by using embedding based manners, to overcome the incompleteness problem faced with knowledge graph (KG). One important recent innovation in Natural Language \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2023:SDR, author = "Xiang Wang and Liping Jing and Huafeng Liu and Jian Yu", title = "Structure-Driven Representation Learning for Deep Clustering", journal = j-TKDD, volume = "18", number = "1", publisher = "Association for Computing Machinery (ACM)", pages = "1--25", month = oct, year = "2023", DOI = "https://doi.org/10.1145/3623400", ISSN = "1556-472X", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:07:57 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1054", } @Article{Alam:2023:DIP, author = "Md. Tanvir Alam and Chowdhury Farhan Ahmed and Md. Samiullah and Carson Kai-Sang Leung", title = "Discovering Interesting Patterns from Hypergraphs", journal = j-TKDD, volume = "18", number = "1", publisher = "Association for Computing Machinery (ACM)", pages = "1--34", month = oct, year = "2023", DOI = "https://doi.org/10.1145/3622940", ISSN = "1556-472X", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:07:57 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1054", } @Article{Li:2024:ECB, author = "Fangfang Li and Zhi Liu and Junwen Duan and Xingliang Mao and Heyuan Shi and Shichao Zhang", title = "Exploiting Conversation-Branch-Tweet {HyperGraph} Structure to Detect Misinformation on Social Media", journal = j-TKDD, volume = "18", number = "2", pages = "33:1--33:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3610297", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3610297", abstract = "The spread of misinformation on social media is a serious issue that can have negative consequences for public health and political stability. While detecting and identifying misinformation can be challenging, many attempts have been made to address this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Luo:2024:SSG, author = "Xiao Luo and Wei Ju and Yiyang Gu and Zhengyang Mao and Luchen Liu and Yuhui Yuan and Ming Zhang", title = "Self-supervised Graph-level Representation Learning with Adversarial Contrastive Learning", journal = j-TKDD, volume = "18", number = "2", pages = "34:1--34:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624018", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3624018", abstract = "The recently developed unsupervised graph representation learning approaches apply contrastive learning into graph-structured data and achieve promising performance. However, these methods mainly focus on graph augmentation for positive samples, while the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Miller:2024:ASP, author = "Benjamin A. Miller and Zohair Shafi and Wheeler Ruml and Yevgeniy Vorobeychik and Tina Eliassi-Rad and Scott Alfeld", title = "Attacking Shortest Paths by Cutting Edges", journal = j-TKDD, volume = "18", number = "2", pages = "35:1--35:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3622941", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3622941", abstract = "Identifying shortest paths between nodes in a network is a common graph analysis problem that is important for many applications involving routing of resources. An adversary that can manipulate the graph structure could alter traffic patterns to gain some \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Spinnato:2024:UTS, author = "Francesco Spinnato and Riccardo Guidotti and Anna Monreale and Mirco Nanni and Dino Pedreschi and Fosca Giannotti", title = "Understanding Any Time Series Classifier with a Subsequence-based Explainer", journal = j-TKDD, volume = "18", number = "2", pages = "36:1--36:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624480", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3624480", abstract = "The growing availability of time series data has increased the usage of classifiers for this data type. Unfortunately, state-of-the-art time series classifiers are black-box models and, therefore, not usable in critical domains such as healthcare or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2024:FSE, author = "Kui Yu and Zhaolong Ling and Lin Liu and Peipei Li and Hao Wang and Jiuyong Li", title = "Feature Selection for Efficient Local-to-global {Bayesian} Network Structure Learning", journal = j-TKDD, volume = "18", number = "2", pages = "37:1--37:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624479", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3624479", abstract = "Local-to-global learning approach plays an essential role in Bayesian network (BN) structure learning. Existing local-to-global learning algorithms first construct the skeleton of a DAG (directed acyclic graph) by learning the MB (Markov blanket) or PC \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cai:2024:RDS, author = "Ruichu Cai and Fengzhu Wu and Zijian Li and Jie Qiao and Wei Chen and Yuexing Hao and Hao Gu", title = "{REST}: Debiased Social Recommendation via Reconstructing Exposure Strategies", journal = j-TKDD, volume = "18", number = "2", pages = "38:1--38:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624986", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3624986", abstract = "The recommendation system, relying on historical observational data to model the complex relationships among users and items, has achieved great success in real-world applications. Selection bias is one of the most important issues of the existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ye:2024:MDF, author = "Xiaoqing Ye and Yang Sun and Dun Liu and Tianrui Li", title = "A Multisource Data Fusion-based Heterogeneous Graph Attention Network for Competitor Prediction", journal = j-TKDD, volume = "18", number = "2", pages = "39:1--39:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625101", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3625101", abstract = "Competitor identification is an essential component of corporate strategy. With the rapid development of artificial intelligence, various data-mining methodologies and frameworks have emerged to identify competitors. In general, the competitiveness among \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:FPP, author = "Taolin Zhang and Chengyuan Mai and Yaomin Chang and Chuan Chen and Lin Shu and Zibin Zheng", title = "{FedEgo}: Privacy-preserving Personalized Federated Graph Learning with Ego-graphs", journal = j-TKDD, volume = "18", number = "2", pages = "40:1--40:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624017", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3624017", abstract = "As special information carriers containing both structure and feature information, graphs are widely used in graph mining, e.g., Graph Neural Networks (GNNs). However, graph data are stored separately in multiple distributed parties in some practical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2024:CTT, author = "Xiangkui Lu and Jun Wu and Junheng Huang and Fangyuan Luo and Jianbo Yuan", title = "Co-Training-Teaching: a Robust Semi-Supervised Framework for Review-Aware Rating Regression", journal = j-TKDD, volume = "18", number = "2", pages = "41:1--41:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625391", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3625391", abstract = "Review-aware Rating Regression (RaRR) suffers the severe challenge of extreme data sparsity as the multi-modality interactions of ratings accompanied by reviews are costly to obtain. Although some studies of semi-supervised rating regression are proposed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zareie:2024:MDE, author = "Ahmad Zareie and Rizos Sakellariou", title = "Maximizing the Diversity of Exposure in Online Social Networks by Identifying Users with Increased Susceptibility to Persuasion", journal = j-TKDD, volume = "18", number = "2", pages = "42:1--42:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625826", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3625826", abstract = "Individuals may have a range of opinions on controversial topics. However, the ease of making friendships in online social networks tends to create groups of like-minded individuals, who propagate messages that reinforce existing opinions and ignore \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2024:OWG, author = "Hui Xu and Liyao Xiang and Junjie Ou and Yuting Weng and Xinbing Wang and Chenghu Zhou", title = "Open-World Graph Active Learning for Node Classification", journal = j-TKDD, volume = "18", number = "2", pages = "43:1--43:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3607144", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3607144", abstract = "The great power of Graph Neural Networks (GNNs) relies on a large number of labeled training data, but obtaining the labels can be costly in many cases. Graph Active Learning (GAL) is proposed to reduce such annotation costs, but the existing methods \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:SSH, author = "Ying Chen and Siwei Qiang and Mingming Ha and Xiaolei Liu and Shaoshuai Li and Jiabi Tong and Lingfeng Yuan and Xiaobo Guo and Zhenfeng Zhu", title = "Semi-Supervised Heterogeneous Graph Learning with Multi-Level Data Augmentation", journal = j-TKDD, volume = "18", number = "2", pages = "44:1--44:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3608953", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3608953", abstract = "In recent years, semi-supervised graph learning with data augmentation (DA) has been the most commonly used and best-performing method to improve model robustness in sparse scenarios with few labeled samples. However, most existing DA methods are based on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:RET, author = "Huan Wang and Ruigang Liu and Chuanqi Shi and Junyang Chen and Lei Fang and Shun Liu and Zhiguo Gong", title = "Resisting the Edge-Type Disturbance for Link Prediction in Heterogeneous Networks", journal = j-TKDD, volume = "18", number = "2", pages = "45:1--45:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3614099", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3614099", abstract = "The rapid development of heterogeneous networks has proposed new challenges to the long-standing link prediction problem. Existing models trained on the verified edge samples from different types usually learn type-specific knowledge, and their type-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:ASG, author = "Xiaoting Li and Lingwei Chen and Dinghao Wu", title = "Adversary for Social Good: Leveraging Adversarial Attacks to Protect Personal Attribute Privacy", journal = j-TKDD, volume = "18", number = "2", pages = "46:1--46:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3614098", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3614098", abstract = "Social media has drastically reshaped the world that allows billions of people to engage in such interactive environments to conveniently create and share content with the public. Among them, text data (e.g., tweets, blogs) maintains the basic yet \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yan:2024:GMC, author = "Bo Yan and Cheng Yang and Chuan Shi and Yong Fang and Qi Li and Yanfang Ye and Junping Du", title = "Graph Mining for Cybersecurity: a Survey", journal = j-TKDD, volume = "18", number = "2", pages = "47:1--47:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3610228", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3610228", abstract = "The explosive growth of cyber attacks today, such as malware, spam, and intrusions, has caused severe consequences on society. Securing cyberspace has become a great concern for organizations and governments. Traditional machine learning based methods are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:MII, author = "Qiang Huang and Jing Ma and Jundong Li and Ruocheng Guo and Huiyan Sun and Yi Chang", title = "Modeling Interference for Individual Treatment Effect Estimation from Networked Observational Data", journal = j-TKDD, volume = "18", number = "3", pages = "48:1--48:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628449", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3628449", abstract = "Estimating individual treatment effect (ITE) from observational data has attracted great interest in recent years, which plays a crucial role in decision-making across many high-impact domains such as economics, medicine, and e-commerce. Most existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gonzalez-Zelaya:2024:FPD, author = "Vladimiro Gonz{\'a}lez-Zelaya and Juli{\'a}n Salas and David Meg{\'\i}as and Paolo Missier", title = "Fair and Private Data Preprocessing through Microaggregation", journal = j-TKDD, volume = "18", number = "3", pages = "49:1--49:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617377", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3617377", abstract = "Privacy protection for personal data and fairness in automated decisions are fundamental requirements for responsible Machine Learning. Both may be enforced through data preprocessing and share a common target: data should remain useful for a task, while \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2024:AFS, author = "Ling Cheng and Feida Zhu and Yong Wang and Ruicheng Liang and Huiwen Liu", title = "From Asset Flow to Status, Action, and Intention Discovery: Early Malice Detection in Cryptocurrency", journal = j-TKDD, volume = "18", number = "3", pages = "50:1--50:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626102", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3626102", abstract = "Cryptocurrency has been subject to illicit activities probably more often than traditional financial assets due to the pseudo-anonymous nature of its transacting entities. An ideal detection model is expected to achieve all three critical properties of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:EEF, author = "Yimin Huang and Wanwan Wang and Xingying Zhao and Yukun Wang and Xinyu Feng and Hao He and Ming Yao", title = "{EFMVFL}: an Efficient and Flexible Multi-party Vertical Federated Learning without a Third Party", journal = j-TKDD, volume = "18", number = "3", pages = "51:1--51:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3627993", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3627993", abstract = "Federated learning (FL) is a machine learning setting which allows multiple participants collaboratively to train a model under the orchestration of a server without disclosing their local data. Vertical federated learning (VFL) is a special structure in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pellegrina:2024:SEB, author = "Leonardo Pellegrina and Fabio Vandin", title = "{SILVAN}: Estimating Betweenness Centralities with Progressive Sampling and Non-uniform {Rademacher} Bounds", journal = j-TKDD, volume = "18", number = "3", pages = "52:1--52:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628601", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3628601", abstract = "``Sim Sala Bim!'' -Silvan, Betweenness centrality is a popular centrality measure with applications in several domains and whose exact computation is impractical for modern-sized networks. We present SILVAN, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xia:2024:HEU, author = "Tong Xia and Yong Li and Yunhan Qi and Jie Feng and Fengli Xu and Funing Sun and Diansheng Guo and Depeng Jin", title = "History-enhanced and Uncertainty-aware Trajectory Recovery via Attentive Neural Network", journal = j-TKDD, volume = "18", number = "3", pages = "53:1--53:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3615660", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3615660", abstract = "A considerable amount of mobility data has been accumulated due to the proliferation of location-based services. Nevertheless, compared with mobility data from transportation systems like the GPS module in taxis, this kind of data is commonly sparse in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ao:2024:PYV, author = "Xiang Ao and Ling Luo and Xiting Wang and Zhao Yang and Jiun-Hung Chen and Ying Qiao and Qing He and Xing Xie", title = "Put Your Voice on Stage: Personalized Headline Generation for News Articles", journal = j-TKDD, volume = "18", number = "3", pages = "54:1--54:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3629168", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3629168", abstract = "In this article, we study the problem of personalized news headline generation, which aims to produce not only concise and fact-consistent titles for news articles but also decorate these titles as personalized irresistible reading invitations by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:GAG, author = "Ling Chen and Jiahui Xu and Binqing Wu and Jianlong Huang", title = "Group-Aware Graph Neural Network for Nationwide City Air Quality Forecasting", journal = j-TKDD, volume = "18", number = "3", pages = "55:1--55:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3631713", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3631713", abstract = "The problem of air pollution threatens public health. Air quality forecasting can provide the air quality index hours or even days later, which can help the public to prevent air pollution in advance. Previous works focus on citywide air quality \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2024:LEI, author = "Yunji Liang and Lei Liu and Luwen Huangfu and Sagar Samtani and Zhiwen Yu and Daniel D. Zeng", title = "Learning Entangled Interactions of Complex Causality via Self-Paced Contrastive Learning", journal = j-TKDD, volume = "18", number = "3", pages = "56:1--56:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632406", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3632406", abstract = "Learning causality from large-scale text corpora is an important task with numerous applications-for example, in finance, biology, medicine, and scientific discovery. Prior studies have focused mainly on simple causality, which only includes one cause-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hsu:2024:AAC, author = "Chi-Wei Hsu and Chiao-Ting Chen and Szu-Hao Huang", title = "Adaptive Adversarial Contrastive Learning for Cross-Domain Recommendation", journal = j-TKDD, volume = "18", number = "3", pages = "57:1--57:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3630259", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3630259", abstract = "Graph-based cross-domain recommendations (CDRs) are useful for suggesting appropriate items because of their promising ability to extract features from user-item interactions and transfer knowledge across domains. Thus, the model can effectively alleviate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ou:2024:SBO, author = "Weitong Ou and Bo Chen and Xinyi Dai and Weinan Zhang and Weiwen Liu and Ruiming Tang and Yong Yu", title = "A Survey on Bid Optimization in Real-Time Bidding Display Advertising", journal = j-TKDD, volume = "18", number = "3", pages = "58:1--58:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628603", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3628603", abstract = "Real-Time Bidding (RTB) is one of the most important forms of online advertising, where an auction is hosted in real time to sell the individual ad impression. How to design an automated bidding strategy in response to the dynamic auction environment is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ni:2024:LOS, author = "Li Ni and Hefei Xu and Yiwen Zhang and Wenjian Luo and Yingying Huang and Victor S. Sheng", title = "Local Overlapping Spatial-aware Community Detection", journal = j-TKDD, volume = "18", number = "3", pages = "59:1--59:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3634707", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3634707", abstract = "Local spatial-aware community detection refers to detecting a spatial-aware community for a given node using local information. A spatial-aware community means that nodes in the community are tightly connected in structure, and their locations are close \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cai:2024:GDA, author = "Ruichu Cai and Fengzhu Wu and Zijian Li and Pengfei Wei and Lingling Yi and Kun Zhang", title = "Graph Domain Adaptation: a Generative View", journal = j-TKDD, volume = "18", number = "3", pages = "60:1--60:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3631712", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3631712", abstract = "Recent years have witnessed tremendous interest in deep learning on graph-structured data. Due to the high cost of collecting labeled graph-structured data, domain adaptation is important to supervised graph learning tasks with limited samples. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Su:2024:CBF, author = "Cong Su and Guoxian Yu and Yongqing Zheng and Jun Wang and Zhengtian Wu and Xiangliang Zhang and Carlotta Domeniconi", title = "Causality-Based Fair Multiple Decision by Response Functions", journal = j-TKDD, volume = "18", number = "3", pages = "61:1--61:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632529", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3632529", abstract = "A recent trend of fair machine learning is to build a decision model subjected to causality-based fairness requirements, which concern with the causality between sensitive attributes and decisions. Almost all (if not all) solutions focus on a single fair \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Han:2024:LUB, author = "Di Han and Yifan Huang and Junmin Liu and Kai Liao and Kunling Lin", title = "{LSAB}: User Behavioral Pattern Modeling in Sequential Recommendation by Learning Self-Attention Bias", journal = j-TKDD, volume = "18", number = "3", pages = "62:1--62:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632625", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3632625", abstract = "Since the weight of a self-attention model is not affected by the sequence interval, it can more accurately and completely describe the user interests, so it is widely used in processing sequential recommendation. However, the mainstream self-attention \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:LCP, author = "Shenyang Huang and Samy Coulombe and Yasmeen Hitti and Reihaneh Rabbany and Guillaume Rabusseau", title = "{Laplacian} Change Point Detection for Single and Multi-view Dynamic Graphs", journal = j-TKDD, volume = "18", number = "3", pages = "63:1--63:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3631609", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3631609", abstract = "Dynamic graphs are rich data structures that are used to model complex relationships between entities over time. In particular, anomaly detection in temporal graphs is crucial for many real-world applications such as intrusion identification in network \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Saha:2024:PPN, author = "Swapnil Saha and Hafiz Imtiaz", title = "Privacy-Preserving Non-Negative Matrix Factorization with Outliers", journal = j-TKDD, volume = "18", number = "3", pages = "64:1--64:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632961", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3632961", abstract = "Non-negative matrix factorization is a popular unsupervised machine learning algorithm for extracting meaningful features from inherently non-negative data. Such data often contain privacy-sensitive user information. Additionally, the dataset can contain \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:SGI, author = "Ming-Chuan Yang and Guo-Wei Wong and Meng Chang Chen", title = "Sparse Grid Imputation Using Unpaired Imprecise Auxiliary Data: Theory and Application to {PM2.5} Estimation", journal = j-TKDD, volume = "18", number = "3", pages = "65:1--65:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3634751", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3634751", abstract = "Sparse grid imputation (SGI) is a challenging problem, as its goal is to infer the values of the entire grid from a limited number of cells with values. Traditionally, the problem is solved using regression methods such as KNN and kriging, whereas in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2024:PAD, author = "Han Zhao and Xu Yang and Cheng Deng", title = "Parameter-Agnostic Deep Graph Clustering", journal = j-TKDD, volume = "18", number = "3", pages = "66:1--66:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3633783", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3633783", abstract = "Deep graph clustering, efficiently dividing nodes into multiple disjoint clusters in an unsupervised manner, has become a crucial tool for analyzing ubiquitous graph data. Existing methods have acquired impressive clustering effects by optimizing the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:LHT, author = "Song Wang and Yushun Dong and Xiao Huang and Chen Chen and Jundong Li", title = "Learning Hierarchical Task Structures for Few-shot Graph Classification", journal = j-TKDD, volume = "18", number = "3", pages = "67:1--67:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3635473", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3635473", abstract = "The problem of few-shot graph classification targets at assigning class labels for graph samples, where only limited labeled graphs are provided for each class. To solve the problem brought by label scarcity, recent studies have proposed to adopt the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rong:2024:TST, author = "Huan Rong and Xin Yu and Tinghuai Ma and Victor S. Sheng and Yang Zhou and Al-Rodhaan Mznah", title = "Three-stage Transferable and Generative Crowdsourced Comment Integration Framework Based on Zero- and Few-shot Learning with Domain Distribution Alignment", journal = j-TKDD, volume = "18", number = "3", pages = "68:1--68:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3636511", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3636511", abstract = "Online shopping has become a crucial way to encourage daily consumption, where the User-generated, or crowdsourced product comments, can offer a broad range of feedback on e-commerce products. As a result, integrating critical opinions or major attitudes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{DiPalma:2024:EVS, author = "Luciano {Di Palma} and Yanlei Diao and Anna Liu", title = "Efficient Version Space Algorithms for Human-in-the-loop Model Development", journal = j-TKDD, volume = "18", number = "3", pages = "69:1--69:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3637443", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3637443", abstract = "When active learning (AL) is applied to help users develop a model on a large dataset through interactively presenting data instances for labeling, existing AL techniques often suffer from two main drawbacks: First, to reach high accuracy they may require \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tipirneni:2024:SSA, author = "Sindhu Tipirneni and Ming Zhu and Chandan K. Reddy", title = "{StructCoder}: Structure-Aware Transformer for Code Generation", journal = j-TKDD, volume = "18", number = "3", pages = "70:1--70:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3636430", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3636430", abstract = "There has been a recent surge of interest in automating software engineering tasks using deep learning. This article addresses the problem of code generation, in which the goal is to generate target code given source code in a different language or a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fan:2024:DDE, author = "Wei Fan and Yanjie Fu and Shun Zheng and Jiang Bian and Yuanchun Zhou and Hui Xiong", title = "{DEWP}: Deep Expansion Learning for Wind Power Forecasting", journal = j-TKDD, volume = "18", number = "3", pages = "71:1--71:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3637552", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3637552", abstract = "Wind is one kind of high-efficient, environmentally-friendly, and cost-effective energy source. Wind power, as one of the largest renewable energy in the world, has been playing a more and more important role in supplying electricity. Though growing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:EFC, author = "Zhe Liu and Sukumar Letchmunan", title = "Enhanced Fuzzy Clustering for Incomplete Instance with Evidence Combination", journal = j-TKDD, volume = "18", number = "3", pages = "72:1--72:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638061", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jan 15 11:01:47 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638061", abstract = "Clustering incomplete instance is still a challenging task since missing values maybe make the cluster information ambiguous, leading to the uncertainty and imprecision in results. This article investigates an enhanced fuzzy clustering with evidence \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jian:2024:SSS, author = "Meng Jian and Yulong Bai and Jingjing Guo and Lifang Wu", title = "Swarm Self-supervised Hypergraph Embedding for Recommendation", journal = j-TKDD, volume = "18", number = "4", pages = "73:1--73:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638058", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638058", abstract = "The information era brings both opportunities and challenges to information services. Confronting information overload, recommendation technology is dedicated to filtering personalized content to meet users' requirements. The extremely sparse interaction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:PTQ, author = "Wentao Wang and Huifang Ma and Yan Zhao and Zhixin Li", title = "Pre-training Question Embeddings for Improving Knowledge Tracing with Self-supervised Bi-graph Co-contrastive Learning", journal = j-TKDD, volume = "18", number = "4", pages = "74:1--74:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638055", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638055", abstract = "Learning high-quality vector representations (aka. embeddings) of educational questions lies at the core of knowledge tracing (KT), which defines a task of estimating students' knowledge states by predicting the probability that they correctly answer \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "74", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Piao:2024:IHL, author = "Minghao Piao and Yi Sheng and Jinda Yan and Cheng Hao Jin", title = "Image Hash Layer Triggered {CNN} Framework for Wafer Map Failure Pattern Retrieval and Classification", journal = j-TKDD, volume = "18", number = "4", pages = "75:1--75:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638053", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638053", abstract = "Recently, deep learning methods are often used in wafer map failure pattern classification. CNN requires less feature engineering but still needs preprocessing, e.g., denoising and resizing. Denoising is used to improve the quality of the input data, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "75", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiao:2024:TGW, author = "Meng Xiao and Dongjie Wang and Min Wu and Kunpeng Liu and Hui Xiong and Yuanchun Zhou and Yanjie Fu", title = "Traceable Group-Wise Self-Optimizing Feature Transformation Learning: a Dual Optimization Perspective", journal = j-TKDD, volume = "18", number = "4", pages = "76:1--76:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638059", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638059", abstract = "Feature transformation aims to reconstruct an effective representation space by mathematically refining the existing features. It serves as a pivotal approach to combat the curse of dimensionality, enhance model generalization, mitigate data sparsity, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "76", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:GBT, author = "Ximing Li and Bing Wang and Yang Wang and Meng Wang", title = "Graph-based Text Classification by Contrastive Learning with Text-level Graph Augmentation", journal = j-TKDD, volume = "18", number = "4", pages = "77:1--77:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638353", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638353", abstract = "Text Classification (TC) is a fundamental task in the information retrieval community. Nowadays, the mainstay TC methods are built on the deep neural networks, which can learn much more discriminative text features than the traditional shallow learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "77", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:CMM, author = "Tengfei Liu and Yongli Hu and Junbin Gao and Yanfeng Sun and Baocai Yin", title = "Cross-modal Multiple Granularity Interactive Fusion Network for Long Document Classification", journal = j-TKDD, volume = "18", number = "4", pages = "78:1--78:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3631711", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3631711", abstract = "Long Document Classification (LDC) has attracted great attention in Natural Language Processing and achieved considerable progress owing to the large-scale pre-trained language models. In spite of this, as a different problem from the traditional text \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "78", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bozdag:2024:MMG, author = "Mustafa Bozdag and Nurullah Sevim and Aykut Ko{\c{c}}", title = "Measuring and Mitigating Gender Bias in Legal Contextualized Language Models", journal = j-TKDD, volume = "18", number = "4", pages = "79:1--79:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628602", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3628602", abstract = "Transformer-based contextualized language models constitute the state-of-the-art in several natural language processing (NLP) tasks and applications. Despite their utility, contextualized models can contain human-like social biases, as their training \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "79", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:TOS, author = "Chunkai Zhang and Maohua Lyu and Wensheng Gan and Philip S. Yu", title = "Totally-ordered Sequential Rules for Utility Maximization", journal = j-TKDD, volume = "18", number = "4", pages = "80:1--80:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628450", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3628450", abstract = "High-utility sequential pattern mining (HUSPM) is a significant and valuable activity in knowledge discovery and data analytics with many real-world applications. In some cases, HUSPM can not provide an excellent measure to predict what will happen. High-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "80", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:LEE, author = "Yi Yang and Zhong-Qiu Zhao and Gongqing Wu and Xingrui Zhuo and Qing Liu and Quan Bai and Weihua Li", title = "A Lightweight, Effective, and Efficient Model for Label Aggregation in Crowdsourcing", journal = j-TKDD, volume = "18", number = "4", pages = "81:1--81:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3630102", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3630102", abstract = "Due to the presence of noise in crowdsourced labels, label aggregation (LA) has become a standard procedure for post-processing these labels. LA methods estimate true labels from crowdsourced labels by modeling worker quality. However, most existing LA \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "81", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2024:AAG, author = "Shengyu Feng and Baoyu Jing and Yada Zhu and Hanghang Tong", title = "{ArieL}: Adversarial Graph Contrastive Learning", journal = j-TKDD, volume = "18", number = "4", pages = "82:1--82:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638054", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638054", abstract = "Contrastive learning is an effective unsupervised method in graph representation learning. The key component of contrastive learning lies in the construction of positive and negative samples. Previous methods usually utilize the proximity of nodes in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "82", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ding:2024:RGM, author = "Kaize Ding and Jianling Wang and Jundong Li and James Caverlee and Huan Liu", title = "Robust Graph Meta-Learning for Weakly Supervised Few-Shot Node Classification", journal = j-TKDD, volume = "18", number = "4", pages = "83:1--83:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3630260", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3630260", abstract = "Graph machine learning (Graph ML) models typically require abundant labeled instances to provide sufficient supervision signals, which is commonly infeasible in real-world scenarios since labeled data for newly emerged concepts (e.g., new categorizations \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "83", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2024:ELD, author = "Weiyao Zhu and Ou Wu and Fengguang Su and Yingjun Deng", title = "Exploring the Learning Difficulty of Data: Theory and Measure", journal = j-TKDD, volume = "18", number = "4", pages = "84:1--84:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3636512", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3636512", abstract = "``Easy\slash hard sample'' is a popular parlance in machine learning. Learning difficulty of samples refers to how easy/hard a sample is during a learning procedure. An increasing need of measuring learning difficulty demonstrates its importance in machine \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "84", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:TUL, author = "Wei Chen and Chao Huang and Yanwei Yu and Yongguo Jiang and Junyu Dong", title = "Trajectory-User Linking via Hierarchical Spatio-Temporal Attention Networks", journal = j-TKDD, volume = "18", number = "4", pages = "85:1--85:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3635718", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3635718", abstract = "Trajectory-User Linking (TUL) is crucial for human mobility modeling by linking different trajectories to users with the exploration of complex mobility patterns. Existing works mainly rely on the recurrent neural framework to encode the temporal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "85", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:RGN, author = "Gang Liu and Eric Inae and Tengfei Luo and Meng Jiang", title = "Rationalizing Graph Neural Networks with Data Augmentation", journal = j-TKDD, volume = "18", number = "4", pages = "86:1--86:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638781", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638781", abstract = "Graph rationales are representative subgraph structures that best explain and support the graph neural network (GNN) predictions. Graph rationalization involves the joint identification of these subgraphs during GNN training, resulting in improved \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "86", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Long:2024:LGM, author = "Chao Long and Huanhuan Yuan and Junhua Fang and Xuefeng Xian and Guanfeng Liu and Victor S. Sheng and Pengpeng Zhao", title = "Learning Global and Multi-granularity Local Representation with {MLP} for Sequential Recommendation", journal = j-TKDD, volume = "18", number = "4", pages = "87:1--87:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638562", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638562", abstract = "Sequential recommendation aims to predict the next item of interest to users based on their historical behavior data. Usually, users' global and local preferences jointly affect the final recommendation result in different ways. Most existing works use \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "87", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2024:DLN, author = "Hui Xu and Liyao Xiang and Xiaoying Gan and Luoyi Fu and Xinbing Wang and Chenghu Zhou", title = "Distributional Learning for Network Alignment with Global Constraints", journal = j-TKDD, volume = "18", number = "4", pages = "88:1--88:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638056", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638056", abstract = "Network alignment, pairing corresponding nodes across the source and target networks, plays an important role in many data mining tasks. Extensive studies focus on learning node embeddings across different networks in a unified space. However, these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "88", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:BGL, author = "Guixian Zhang and Shichao Zhang and Guan Yuan", title = "{Bayesian} Graph Local Extrema Convolution with Long-tail Strategy for Misinformation Detection", journal = j-TKDD, volume = "18", number = "4", pages = "89:1--89:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639408", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639408", abstract = "It has become a cardinal task to identify fake information (misinformation) on social media, because it has significantly harmed the government and the public. There are many spam bots maliciously retweeting misinformation. This study proposes an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "89", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shu:2024:MIL, author = "Senlin Shu and Deng-Bao Wang and Suqin Yuan and Hongxin Wei and Jiuchuan Jiang and Lei Feng and Min-Ling Zhang", title = "Multiple-instance Learning from Triplet Comparison Bags", journal = j-TKDD, volume = "18", number = "4", pages = "90:1--90:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638776", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638776", abstract = "Multiple-instance learning (MIL) solves the problem where training instances are grouped in bags, and a binary (positive or negative) label is provided for each bag. Most of the existing MIL studies need fully labeled bags for training an effective \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "90", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:PDP, author = "Yilin Wang and Sha Zhao and Shiwei Zhao and Runze Wu and Yuhong Xu and Jianrong Tao and Tangjie Lv and Shijian Li and Zhipeng Hu and Gang Pan", title = "{PU-Detector}: a {PU} Learning-based Framework for Real Money Trading Detection in {MMORPG}", journal = j-TKDD, volume = "18", number = "4", pages = "91:1--91:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638561", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638561", abstract = "Massive multiplayer online role-playing games (MMORPG) have been becoming one of the most popular and exciting online games. In recent years, a cheating phenomenon called real money trading (RMT) has arisen and damaged the fantasy world in many ways. RMT \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "91", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:DSA, author = "Yuhong Zhang and Jianqing Wu and Kui Yu and Xindong Wu", title = "Diverse Structure-Aware Relation Representation in Cross-Lingual Entity Alignment", journal = j-TKDD, volume = "18", number = "4", pages = "92:1--92:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638778", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638778", abstract = "Cross-lingual entity alignment (CLEA) aims to find equivalent entity pairs between knowledge graphs (KGs) in different languages. It is an important way to connect heterogeneous KGs and facilitate knowledge completion. Existing methods have found that \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "92", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Geeganage:2024:SET, author = "Dakshi Kapugama Geeganage and Yue Xu and Yuefeng Li", title = "A Semantics-enhanced Topic Modelling Technique: {Semantic-LDA}", journal = j-TKDD, volume = "18", number = "4", pages = "93:1--93:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639409", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639409", abstract = "Topic modelling is a beneficial technique used to discover latent topics in text collections. But to correctly understand the text content and generate a meaningful topic list, semantics are important. By ignoring semantics, that is, not attempting to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "93", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ren:2024:HPG, author = "Yuyang Ren and Haonan Zhang and Luoyi Fu and Shiyu Liang and Lei Zhou and Xinbing Wang and Xinde Cao and Fei Long and Chenghu Zhou", title = "{Hi-PART}: Going Beyond Graph Pooling with Hierarchical Partition Tree for Graph-Level Representation Learning", journal = j-TKDD, volume = "18", number = "4", pages = "94:1--94:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3636429", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3636429", abstract = "Graph pooling refers to the operation that maps a set of node representations into a compact form for graph-level representation learning. However, existing graph pooling methods are limited by the power of the Weisfeiler-Lehman (WL) test in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "94", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Balalau:2024:FSM, author = "Oana Balalau and Francesco Bonchi and T-H. Hubert Chan and Francesco Gullo and Mauro Sozio and Hao Xie", title = "Finding Subgraphs with Maximum Total Density and Limited Overlap in Weighted Hypergraphs", journal = j-TKDD, volume = "18", number = "4", pages = "95:1--95:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639410", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639410", abstract = "Finding dense subgraphs in large (hyper)graphs is a key primitive in a variety of real-world application domains, encompassing social network analytics, event detection, biology, and finance. In most such applications, one typically aims at finding \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "95", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:CDA, author = "Jinpeng Li and Hang Yu and Zhenyu Zhang and Xiangfeng Luo and Shaorong Xie", title = "Concept Drift Adaptation by Exploiting Drift Type", journal = j-TKDD, volume = "18", number = "4", pages = "96:1--96:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638777", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638777", abstract = "Concept drift is a phenomenon where the distribution of data streams changes over time. When this happens, model predictions become less accurate. Hence, models built in the past need to be re-learned for the current data. Two design questions need to be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "96", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiao:2024:NGN, author = "Feng Xiao and Youfa Liu and Jia Shao", title = "{NNC-GCN}: Neighbours-to-Neighbours Contrastive Graph Convolutional Network for Semi-Supervised Classification", journal = j-TKDD, volume = "18", number = "4", pages = "97:1--97:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638780", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638780", abstract = "Contrastive learning (CL) is a popular learning paradigm in deep learning, which uses contrastive principle to learn low-dimensional embeddings, and has been applied in Graph Neural Networks (GNNs) successfully. Existing works of contrastive multi-view \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "97", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lai:2024:IAM, author = "Jinrong Lai and Tong Wang and Chuan Chen and Zibin Zheng", title = "Information-aware Multi-view Outlier Detection", journal = j-TKDD, volume = "18", number = "4", pages = "98:1--98:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638354", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638354", abstract = "With the development of multi-view learning, multi-view outlier detection has received increasing attention in recent years. However, the current research still faces two challenges: (1) The current research lacks theoretical analysis tools for multi-view \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "98", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tu:2024:PFL, author = "Jingke Tu and Jiaming Huang and Lei Yang and Wanyu Lin", title = "Personalized Federated Learning with Layer-Wise Feature Transformation via Meta-Learning", journal = j-TKDD, volume = "18", number = "4", pages = "99:1--99:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638252", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638252", abstract = "Federated learning enables multiple clients to collaboratively learn machine learning models in a privacy-preserving manner. However, in real-world scenarios, a key challenge encountered in federated learning is the statistical heterogeneity among \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "99", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Khan:2024:HBP, author = "Mehak Khan and Gustavo B. M. Mello and Laurence Habib and Paal Engelstad and Anis Yazidi", title = "{HITS}-based Propagation Paradigm for Graph Neural Networks", journal = j-TKDD, volume = "18", number = "4", pages = "100:1--100:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638779", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638779", abstract = "In this article, we present a new propagation paradigm based on the principle of Hyperlink-Induced Topic Search (HITS) algorithm. The HITS algorithm utilizes the concept of a ``self-reinforcing'' relationship of authority-hub. Using HITS, the centrality of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "100", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ghahramanian:2024:NNE, author = "Pouya Ghahramanian and Sepehr Bakhshi and Hamed Bonab and Fazli Can", title = "A Novel Neural Ensemble Architecture for On-the-fly Classification of Evolving Text Streams", journal = j-TKDD, volume = "18", number = "4", pages = "101:1--101:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639054", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639054", abstract = "We study on-the-fly classification of evolving text streams in which the relation between the input data and target labels changes over time-i.e., ``concept drift.'' These variations decrease the model's performance, as predictions become less accurate over \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "101", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:DDG, author = "Ying Zhang and Zhiqiang Zhao and Zhuo Feng", title = "{diGRASS}: Directed Graph Spectral Sparsification via Spectrum-Preserving Symmetrization", journal = j-TKDD, volume = "18", number = "4", pages = "102:1--102:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639568", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639568", abstract = "Recent spectral graph sparsification research aims to construct ultra-sparse subgraphs for preserving the original graph spectral (structural) properties, such as the first few Laplacian eigenvalues and eigenvectors, which has led to the development of a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "102", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pan:2024:EDD, author = "Yicheng Pan and Yifan Zhang and Xinrui Jiang and Meng Ma and Ping Wang", title = "{EffCause}: Discover Dynamic Causal Relationships Efficiently from Time-Series", journal = j-TKDD, volume = "18", number = "5", pages = "105:1--105:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3640818", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3640818", abstract = "Since the proposal of Granger causality, many researchers have followed the idea and developed extensions to the original algorithm. The classic Granger causality test aims to detect the existence of the static causal relationship. Notably, a fundamental \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "105", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yao:2024:ALG, author = "Kai-Lang Yao and Wu-Jun Li", title = "Asymmetric Learning for Graph Neural Network based Link Prediction", journal = j-TKDD, volume = "18", number = "5", pages = "106:1--106:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3640347", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3640347", abstract = "Link prediction is a fundamental problem in many graph-based applications, such as protein-protein interaction prediction. Recently, graph neural network (GNN) has been widely used for link prediction. However, existing GNN-based link prediction (GNN-LP) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "106", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2024:MTL, author = "Xiaobo Guo and Mingming Ha and Xuewen Tao and Shaoshuai Li and Youru Li and Zhenfeng Zhu and Zhiyong Shen and Li Ma", title = "Multi-Task Learning with Sequential Dependence Toward Industrial Applications: a Systematic Formulation", journal = j-TKDD, volume = "18", number = "5", pages = "107:1--107:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3640468", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3640468", abstract = "Multi-task learning (MTL) is widely used in the online recommendation and financial services for multi-step conversion estimation, but current works often overlook the sequential dependence among tasks. In particular, sequential dependence multi-task \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "107", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:PAT, author = "Lei Zhang and Yong Liu and Zhiwei Zeng and Yiming Cao and Xingyu Wu and Yonghui Xu and Zhiqi Shen and Lizhen Cui", title = "Package Arrival Time Prediction via Knowledge Distillation Graph Neural Network", journal = j-TKDD, volume = "18", number = "5", pages = "108:1--108:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643033", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643033", abstract = "Accurately estimating packages' arrival time in e-commerce can enhance users' shopping experience and improve the placement rate of products. This problem is often formalized as an Origin-Destination (OD)-based ETA (i.e., estimated time of arrival) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "108", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kuo:2024:CAG, author = "Chuan-Wei Kuo and Bo-Yu Chen and Wen-Chih Peng and Chih-Chieh Hung and Hsin-Ning Su", title = "Correlation-aware Graph Data Augmentation with Implicit and Explicit Neighbors", journal = j-TKDD, volume = "18", number = "5", pages = "109:1--109:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638057", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638057", abstract = "In recent years, there has been a significant surge in commercial demand for citation graph-based tasks, such as patent analysis, social network analysis, and recommendation systems. Graph Neural Networks (GNNs) are widely used for these tasks due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "109", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Duan:2024:ACT, author = "Mingxing Duan and Kenli Li and Weinan Zhang and Jiarui Qin and Bin Xiao", title = "Attacking Click-through Rate Predictors via Generating Realistic Fake Samples", journal = j-TKDD, volume = "18", number = "5", pages = "110:1--110:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643685", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643685", abstract = "How to construct imperceptible (realistic) fake samples is critical in adversarial attacks. Due to the sample feature diversity of a recommender system (containing both discrete and continuous features), traditional gradient-based adversarial attack \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "110", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Khodabandehlou:2024:FUF, author = "Samira Khodabandehlou and Alireza Hashemi Golpayegani", title = "{FiFrauD}: Unsupervised Financial Fraud Detection in Dynamic Graph Streams", journal = j-TKDD, volume = "18", number = "5", pages = "111:1--111:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641857", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3641857", abstract = "Given a stream of financial transactions between traders in an e-market, how can we accurately detect fraudulent traders and suspicious behaviors in real time? Despite the efforts made in detecting these fraudsters, this field still faces serious \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "111", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2024:PEC, author = "Jianshan Sun and Suyuan Mei and Kun Yuan and Yuanchun Jiang and Jie Cao", title = "Prerequisite-Enhanced Category-Aware Graph Neural Networks for Course Recommendation", journal = j-TKDD, volume = "18", number = "5", pages = "112:1--112:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643644", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643644", abstract = "The rapid development of Massive Open Online Courses (MOOCs) platforms has created an urgent need for an efficient personalized course recommender system that can assist learners of all backgrounds and levels of knowledge in selecting appropriate courses. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "112", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Deng:2024:DGT, author = "Songgaojun Deng and Olivier Sprangers and Ming Li and Sebastian Schelter and Maarten de Rijke", title = "Domain Generalization in Time Series Forecasting", journal = j-TKDD, volume = "18", number = "5", pages = "113:1--113:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643035", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643035", abstract = "Domain generalization aims to design models that can effectively generalize to unseen target domains by learning from observed source domains. Domain generalization poses a significant challenge for time series data, due to varying data distributions and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "113", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:TTS, author = "Gengsen Huang and Wensheng Gan and Philip S. Yu", title = "{TaSPM}: Targeted Sequential Pattern Mining", journal = j-TKDD, volume = "18", number = "5", pages = "114:1--114:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639827", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639827", abstract = "Sequential pattern mining (SPM) is an important technique in the field of pattern mining, which has many applications in reality. Although many efficient SPM algorithms have been proposed, there are few studies that can focus on targeted tasks. Targeted \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "114", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2024:NTS, author = "Yichen Zhu and Bo Jiang and Haiming Jin and Mengtian Zhang and Feng Gao and Jianqiang Huang and Tao Lin and Xinbing Wang", title = "Networked Time-series Prediction with Incomplete Data via Generative Adversarial Network", journal = j-TKDD, volume = "18", number = "5", pages = "115:1--115:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643822", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643822", abstract = "A networked time series (NETS) is a family of time series on a given graph, one for each node. It has a wide range of applications from intelligent transportation to environment monitoring to smart grid management. An important task in such applications \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "115", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rong:2024:CRC, author = "Huan Rong and Minfeng Qian and Tinghuai Ma and Di Jin and Victor S. Sheng", title = "{CoBjeason}: Reasoning Covered Object in Image by Multi-Agent Collaboration Based on Informed Knowledge Graph", journal = j-TKDD, volume = "18", number = "5", pages = "116:1--116:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643565", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643565", abstract = "Object detection is a widely studied problem in existing works. However, in this paper, we turn to a more challenging problem of `` Covered Object Reasoning '', aimed at reasoning the category label of target object in the given image particularly when it has \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "116", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pandey:2024:XDR, author = "Pradumn Kumar Pandey and Aikta Arya and Akrati Saxena", title = "{X-distribution}: Retraceable Power-law Exponent of Complex Networks", journal = j-TKDD, volume = "18", number = "5", pages = "117:1--117:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639413", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639413", abstract = "Network modeling has been explored extensively by means of theoretical analysis as well as numerical simulations for Network Reconstruction (NR). The network reconstruction problem requires the estimation of the power-law exponent ($ \gamma $) of a given input \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "117", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moradnia:2024:SCP, author = "Sajedeh Moradnia and Mousa Golalizadeh", title = "Supervised Clustering of {Persian} Handwritten Images Using Regularization and Dimension Reduction Methods", journal = j-TKDD, volume = "18", number = "5", pages = "118:1--118:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638060", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638060", abstract = "Clustering, as a fundamental exploratory data technique, not only is used to discover patterns and structures in complex datasets but also is utilized to group variables in high-dimensional data analysis. Dimension reduction through clustering helps \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "118", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:GTS, author = "Hongjie Chen and Hoda Eldardiry", title = "Graph Time-series Modeling in Deep Learning: a Survey", journal = j-TKDD, volume = "18", number = "5", pages = "119:1--119:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638534", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3638534", abstract = "Time-series and graphs have been extensively studied for their ubiquitous existence in numerous domains. Both topics have been separately explored in the field of deep learning. For time-series modeling, recurrent neural networks or convolutional neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "119", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Poulakis:2024:SAM, author = "Yannis Poulakis and Christos Doulkeridis and Dimosthenis Kyriazis", title = "A Survey on {AutoML} Methods and Systems for Clustering", journal = j-TKDD, volume = "18", number = "5", pages = "120:1--120:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643564", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643564", abstract = "Automated Machine Learning (AutoML) aims to identify the best-performing machine learning algorithm along with its input parameters for a given dataset and a specific machine learning task. This is a challenging problem, as the process of finding the best \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "120", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rodriguez-Gonzalez:2024:XFN, author = "Ansel Y. Rodr{\'\i}guez-Gonz{\'a}lez and Ram{\'o}n Aranda and Miguel {\'A}. {\'A}lvarez-Carmona and Angel D{\'\i}az-Pacheco and Rosa Mar{\'\i}a Valdovinos Rosas", title = "{X-FSPMiner}: a Novel Algorithm for Frequent Similar Pattern Mining", journal = j-TKDD, volume = "18", number = "5", pages = "121:1--121:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643820", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643820", abstract = "Frequent similar pattern mining (FSP mining) allows for finding frequent patterns hidden from the classical approach. However, the use of similarity functions implies more computational effort, necessitating the development of more efficient algorithms \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "121", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Luan:2024:MIL, author = "Tianxiang Luan and Shilin Gu and Xijia Tang and Wenzhang Zhuge and Chenping Hou", title = "Multi-Instance Learning with One Side Label Noise", journal = j-TKDD, volume = "18", number = "5", pages = "122:1--122:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644076", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644076", abstract = "Multi-instance Learning (MIL) is a popular learning paradigm arising from many real applications. It assigns a label to a set of instances, which is called a bag, and the bag's label is determined by the instances within it. A bag is positive if and only \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "122", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qin:2024:MWP, author = "Wei Qin and Xiaowei Wang and Zhenzhen Hu and Lei Wang and Yunshi Lan and Richang Hong", title = "Math Word Problem Generation via Disentangled Memory Retrieval", journal = j-TKDD, volume = "18", number = "5", pages = "123:1--123:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639569", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639569", abstract = "The task of math word problem (MWP) generation, which generates an MWP given an equation and relevant topic words, has increasingly attracted researchers' attention. In this work, we introduce a simple memory retrieval module to search related training \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "123", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:VHL, author = "Haobo Wang and Cheng Peng and Hede Dong and Lei Feng and Weiwei Liu and Tianlei Hu and Ke Chen and Gang Chen", title = "On the Value of Head Labels in Multi-Label Text Classification", journal = j-TKDD, volume = "18", number = "5", pages = "124:1--124:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643853", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643853", abstract = "A formidable challenge in the multi-label text classification (MLTC) context is that the labels often exhibit a long-tailed distribution, which typically prevents deep MLTC models from obtaining satisfactory performance. To alleviate this problem, most \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "124", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2024:TDP, author = "Wentao Hu and Hui Fang", title = "Towards Differential Privacy in Sequential Recommendation: a Noisy Graph Neural Network Approach", journal = j-TKDD, volume = "18", number = "5", pages = "125:1--125:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643821", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643821", abstract = "With increasing frequency of high-profile privacy breaches in various online platforms, users are becoming more concerned about their privacy. And recommender system is the core component of online platforms for providing personalized service, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "125", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ni:2024:LCD, author = "Li Ni and Rui Ye and Wenjian Luo and Yiwen Zhang", title = "Local Community Detection in Multiple Private Networks", journal = j-TKDD, volume = "18", number = "5", pages = "126:1--126:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644078", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644078", abstract = "Individuals are often involved in multiple online social networks. Considering that owners of these networks are unwilling to share their networks, some global algorithms combine information from multiple networks to detect all communities in multiple \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "126", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sui:2024:EDG, author = "Yongduo Sui and Wenyu Mao and Shuyao Wang and Xiang Wang and Jiancan Wu and Xiangnan He and Tat-Seng Chua", title = "Enhancing Out-of-distribution Generalization on Graphs via Causal Attention Learning", journal = j-TKDD, volume = "18", number = "5", pages = "127:1--127:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644392", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644392", abstract = "In graph classification, attention- and pooling-based graph neural networks (GNNs) predominate to extract salient features from the input graph and support the prediction. They mostly follow the paradigm of ``learning to attend,'' which maximizes the mutual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "127", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2024:IML, author = "Kai Sun and Huajie Jiang and Yongli Hu and Baocai Yin", title = "Incorporating Multi-Level Sampling with Adaptive Aggregation for Inductive Knowledge Graph Completion", journal = j-TKDD, volume = "18", number = "5", pages = "128:1--128:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644822", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644822", abstract = "In recent years, Graph Neural Networks (GNNs) have achieved unprecedented success in handling graph-structured data, thereby driving the development of numerous GNN-oriented techniques for inductive knowledge graph completion (KGC). A key limitation of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "128", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yao:2024:TLP, author = "Rujing Yao and Ou Wu", title = "A Taxonomy for Learning with Perturbation and Algorithms", journal = j-TKDD, volume = "18", number = "5", pages = "129:1--129:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644391", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644391", abstract = "Weighting strategy prevails in machine learning. For example, a common approach in robust machine learning is to exert low weights on samples which are likely to be noisy or quite hard. This study summarizes another less-explored strategy, namely, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "129", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Han:2024:GBM, author = "Yuehui Han", title = "Generation-based Multi-view Contrast for Self-supervised Graph Representation Learning", journal = j-TKDD, volume = "18", number = "5", pages = "130:1--130:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645095", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3645095", abstract = "Graph contrastive learning has made remarkable achievements in the self-supervised representation learning of graph-structured data. By employing perturbation function (i.e., perturbation on the nodes or edges of graph), most graph contrastive learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "130", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Singh:2024:MTH, author = "Kuldeep Singh and Bhaskar Biswas", title = "Mining Top-$k$ High On-shelf Utility Itemsets Using Novel Threshold Raising Strategies", journal = j-TKDD, volume = "18", number = "5", pages = "131:1--131:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645115", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3645115", abstract = "High utility itemsets (HUIs) mining is an emerging area of data mining which discovers sets of items generating a high profit from transactional datasets. In recent years, several algorithms have been proposed for this task. However, most of them do not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "131", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pingi:2024:CGA, author = "Sharon Torao Pingi and Richi Nayak and Md Abul Bashar", title = "Conditional Generative Adversarial Network for Early Classification of Longitudinal Datasets Using an Imputation Approach", journal = j-TKDD, volume = "18", number = "5", pages = "132:1--132:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644821", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644821", abstract = "Early classification of longitudinal data remains an active area of research today. The complexity of these datasets and the high rates of missing data caused by irregular sampling present data-level challenges for the Early Longitudinal Data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "132", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dornaika:2024:SIS, author = "Fadi Dornaika and Zoulfikar Ibrahim and Alirezah Bosaghzadeh", title = "Scalable and Inductive Semi-supervised Classifier with Sample Weighting Based on Graph Topology", journal = j-TKDD, volume = "18", number = "5", pages = "133:1--133:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643645", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:42:59 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643645", abstract = "Recently, graph-based semi-supervised learning (GSSL) has garnered significant interest in the realms of machine learning and pattern recognition. Although some of the proposed methods have made some progress, there are still some shortcomings that need \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "133", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:SBA, author = "Yang Yang and Feifei Wang and Enqiang Zhu and Fei Jiang and Wen Yao", title = "Social Behavior Analysis in Exclusive Enterprise Social Networks by {FastHAND}", journal = j-TKDD, volume = "18", number = "6", pages = "134:1--134:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3646552", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3646552", abstract = "There is an emerging trend in the Chinese automobile industries that automakers are introducing exclusive enterprise social networks (EESNs) to expand sales and provide after-sale services. The traditional online social networks (OSNs) and enterprise \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "134", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:BTB, author = "Huiping Chen and Alessio Conte and Roberto Grossi and Grigorios Loukides and Solon P. Pissis and Michelle Sweering", title = "On Breaking Truss-based and Core-based Communities", journal = j-TKDD, volume = "18", number = "6", pages = "135:1--135:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644077", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3644077", abstract = "We introduce the general problem of identifying a smallest edge subset of a given graph whose deletion makes the graph community-free. We consider this problem under two community notions that have attracted significant attention: k -truss and k -core. We \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "135", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:ISD, author = "Xuefei Li and Huiwei Zhou and Weihong Yao and Wenchu Li and Baojie Liu and Yingyu Lin", title = "Intricate Spatiotemporal Dependency Learning for Temporal Knowledge Graph Reasoning", journal = j-TKDD, volume = "18", number = "6", pages = "136:1--136:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648366", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3648366", abstract = "Knowledge Graph (KG) reasoning has been an interesting topic in recent decades. Most current researches focus on predicting the missing facts for incomplete KG. Nevertheless, Temporal KG (TKG) reasoning, which is to forecast future facts, still faces with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "136", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zheng:2024:PPA, author = "Yimei Zheng and Caiyan Jia", title = "{ProtoMGAE}: Prototype-Aware Masked Graph Auto-Encoder for Graph Representation Learning", journal = j-TKDD, volume = "18", number = "6", pages = "137:1--137:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649143", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649143", abstract = "Graph self-supervised representation learning has gained considerable attention and demonstrated remarkable efficacy in extracting meaningful representations from graphs, particularly in the absence of labeled data. Two representative methods in this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "137", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:FAG, author = "April Chen and Ryan A. Rossi and Namyong Park and Puja Trivedi and Yu Wang and Tong Yu and Sungchul Kim and Franck Dernoncourt and Nesreen K. Ahmed", title = "Fairness-Aware Graph Neural Networks: a Survey", journal = j-TKDD, volume = "18", number = "6", pages = "138:1--138:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649142", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649142", abstract = "Graph Neural Networks (GNNs) have become increasingly important due to their representational power and state-of-the-art predictive performance on many fundamental learning tasks. Despite this success, GNNs suffer from fairness issues that arise as a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "138", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:BSB, author = "Acong Zhang and Jincheng Huang and Ping Li and Kai Zhang", title = "Building Shortcuts between Distant Nodes with Biaffine Mapping for Graph Convolutional Networks", journal = j-TKDD, volume = "18", number = "6", pages = "139:1--139:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3650113", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3650113", abstract = "Multiple recent studies show a paradox in graph convolutional networks (GCNs)-that is, shallow architectures limit the capability of learning information from high-order neighbors, whereas deep architectures suffer from over-smoothing or over-squashing. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "139", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:DGN, author = "Zhe Chen and Aixin Sun", title = "{DP-GCN}: Node Classification by Connectivity and Local Topology Structure on Real-World Network", journal = j-TKDD, volume = "18", number = "6", pages = "140:1--140:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649460", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649460", abstract = "Node classification is to predict the class label of a node by analyzing its properties and interactions in a network. We note that many existing solutions for graph-based node classification only consider node connectivity but not the node's local \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "140", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ali:2024:SSS, author = "Sarwan Ali and Muhammad Ahmad and Maham Anwer Beg and Imdad Ullah Khan and Safiullah Faizullah and Muhammad Asad Khan", title = "{SsAG}: Summarization and Sparsification of Attributed Graphs", journal = j-TKDD, volume = "18", number = "6", pages = "141:1--141:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3651619", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3651619", abstract = "Graph summarization has become integral for managing and analyzing large-scale graphs in diverse real-world applications, including social networks, biological networks, and communication networks. Existing methods for graph summarization often face \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "141", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Song:2024:MSM, author = "Derun Song and Enneng Yang and Guibing Guo and Li Shen and Linying Jiang and Xingwei Wang", title = "Multi-Scenario and Multi-Task Aware Feature Interaction for Recommendation System", journal = j-TKDD, volume = "18", number = "6", pages = "142:1--142:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3651312", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3651312", abstract = "Multi-scenario and multi-task recommendation can use various feedback behaviors of users in different scenarios to learn users' preferences and then make recommendations, which has attracted attention. However, the existing work ignores feature \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "142", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Connor:2024:NZN, author = "Richard Connor and Lucia Vadicamo", title = "{nSimplex Zen}: a Novel Dimensionality Reduction for {Euclidean} and {Hilbert} Spaces", journal = j-TKDD, volume = "18", number = "6", pages = "143:1--143:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3647642", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3647642", abstract = "Dimensionality reduction techniques map values from a high dimensional space to one with a lower dimension. The result is a space which requires less physical memory and has a faster distance calculation. These techniques are widely used where required \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "143", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ji:2024:CFM, author = "Taoran Ji and Nathan Self and Kaiqun Fu and Zhiqian Chen and Naren Ramakrishnan and Chang-Tien Lu", title = "Citation Forecasting with Multi-Context Attention-Aided Dependency Modeling", journal = j-TKDD, volume = "18", number = "6", pages = "144:1--144:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649140", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649140", abstract = "Forecasting citations of scientific patents and publications is a crucial task for understanding the evolution and development of technological domains and for foresight into emerging technologies. By construing citations as a time series, the task can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "144", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2024:NEP, author = "Houquan Zhou and Shenghua Liu and Huawei Shen and Xueqi Cheng", title = "Node Embedding Preserving Graph Summarization", journal = j-TKDD, volume = "18", number = "6", pages = "145:1--145:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649505", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649505", abstract = "Graph summarization is a useful tool for analyzing large-scale graphs. Some works tried to preserve original node embeddings encoding rich structural information of nodes on the summary graph. However, their algorithms are designed heuristically and not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "145", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Theocharidis:2024:ACA, author = "Konstantinos Theocharidis and Panagiotis Karras and Manolis Terrovitis and Spiros Skiadopoulos and Hady W. Lauw", title = "Adaptive Content-Aware Influence Maximization via Online Learning to Rank", journal = j-TKDD, volume = "18", number = "6", pages = "146:1--146:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3651987", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3651987", abstract = "How can we adapt the composition of a post over a series of rounds to make it more appealing in a social network? Techniques that progressively learn how to make a fixed post more influential over rounds have been studied in the context of the Influence \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "146", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Paterakis:2024:DWR, author = "George Paterakis and Stefanos Fafalios and Paulos Charonyktakis and Vassilis Christophides and Ioannis Tsamardinos", title = "Do We Really Need Imputation in {AutoML} Predictive Modeling?", journal = j-TKDD, volume = "18", number = "6", pages = "147:1--147:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643643", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643643", abstract = "Numerous real-world data contain missing values, while in contrast, most Machine Learning (ML) algorithms assume complete datasets. For this reason, several imputation algorithms have been proposed to predict and fill in the missing values. Given the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "147", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:DDL, author = "Chi Zhang and Linhao Cai and Meng Chen and Xiucheng Li and Gao Cong", title = "{DeepMeshCity}: a Deep Learning Model for Urban Grid Prediction", journal = j-TKDD, volume = "18", number = "6", pages = "148:1--148:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652859", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3652859", abstract = "Urban grid prediction can be applied to many classic spatial-temporal prediction tasks such as air quality prediction, crowd density prediction, and traffic flow prediction, which is of great importance to smart city building. In light of its practical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "148", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:MSM, author = "Hongwei Yang and Hui He and Weizhe Zhang and Yan Wang and Lin Jing", title = "Multi-Source and Multi-modal Deep Network Embedding for Cross-Network Node Classification", journal = j-TKDD, volume = "18", number = "6", pages = "149:1--149:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653304", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653304", abstract = "In recent years, to address the issue of networked data sparsity in node classification tasks, cross-network node classification (CNNC) leverages the richer information from a source network to enhance the performance of node classification in the target \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "149", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rong:2024:LGT, author = "Can Rong and Zhicheng Liu and Jingtao Ding and Yong Li", title = "Learning to Generate Temporal Origin-destination Flow Based-on Urban Regional Features and Traffic Information", journal = j-TKDD, volume = "18", number = "6", pages = "150:1--150:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649141", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649141", abstract = "Origin-destination (OD) flow contains population mobility information between every two regions in the city, which is of great value in urban planning and transportation management. Nevertheless, the collection of OD flow data is extremely difficult due \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "150", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gu:2024:NJC, author = "Zhibin Gu and Songhe Feng and Zhendong Li and Jiazheng Yuan and Jun Liu", title = "{NOODLE}: Joint Cross-View Discrepancy Discovery and High-Order Correlation Detection for Multi-View Subspace Clustering", journal = j-TKDD, volume = "18", number = "6", pages = "151:1--151:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653305", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653305", abstract = "Benefiting from the effective exploration of the valuable topological pair-wise relationship of data points across multiple views, multi-view subspace clustering (MVSC) has received increasing attention in recent years. However, we observe that existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "151", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:DPF, author = "Yuhan Wang and Qing Xie and Mengzi Tang and Lin Li and Jingling Yuan and Yongjian Liu", title = "A Dual Perspective Framework of Knowledge-correlation for Cross-domain Recommendation", journal = j-TKDD, volume = "18", number = "6", pages = "152:1--152:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652520", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", note = "See erratum \cite{tkdd}.", URL = "https://dl.acm.org/doi/10.1145/3652520", abstract = "Recommender System provides users with online services in a personalized way. The performance of traditional recommender systems may deteriorate because of problems such as cold-start and data sparsity. Cross-domain Recommendation System utilizes the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "152", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2024:DER, author = "Chen Zhao and Feng Mi and Xintao Wu and Kai Jiang and Latifur Khan and Feng Chen", title = "Dynamic Environment Responsive Online Meta-Learning with Fairness Awareness", journal = j-TKDD, volume = "18", number = "6", pages = "153:1--153:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648684", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3648684", abstract = "The fairness-aware online learning framework has emerged as a potent tool within the context of continuous lifelong learning. In this scenario, the learner's objective is to progressively acquire new tasks as they arrive over time, while also guaranteeing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "153", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2024:HCN, author = "Hong Zhao and Zhengyu Li and Wenwei He and Yan Zhao", title = "Hierarchical Convolutional Neural Network with Knowledge Complementation for Long-Tailed Classification", journal = j-TKDD, volume = "18", number = "6", pages = "154:1--154:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653717", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653717", abstract = "Existing methods based on transfer learning leverage auxiliary information to help tail generalization and improve the performance of the tail classes. However, they cannot fully exploit the relationships between auxiliary information and tail classes and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "154", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sahebi:2024:MMP, author = "Sherry Sahebi and Mengfan Yao and Siqian Zhao and Reza Feyzi Behnagh", title = "{MoMENt}: Marked Point Processes with Memory-Enhanced Neural Networks for User Activity Modeling", journal = j-TKDD, volume = "18", number = "6", pages = "155:1--155:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649504", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649504", abstract = "Marked temporal point process models (MTPPs) aim to model event sequences and event markers (associated features) in continuous time. These models have been applied to various application domains where capturing event dynamics in continuous time is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "155", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Choe:2024:RBT, author = "Minyoung Choe and Jaemin Yoo and Geon Lee and Woonsung Baek and U. Kang and Kijung Shin", title = "Representative and Back-In-Time Sampling from Real-world Hypergraphs", journal = j-TKDD, volume = "18", number = "6", pages = "156:1--156:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653306", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653306", abstract = "Graphs are widely used for representing pairwise interactions in complex systems. Since such real-world graphs are large and often evergrowing, sampling subgraphs is useful for various purposes, including simulation, visualization, stream processing, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "156", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cui:2024:SSM, author = "Guosheng Cui and Ruxin Wang and Dan Wu and Ye Li", title = "Semi-supervised Multi-view Clustering based on {NMF} with Fusion Regularization", journal = j-TKDD, volume = "18", number = "6", pages = "157:1--157:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653022", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653022", abstract = "Multi-view clustering has attracted significant attention and application. Nonnegative matrix factorization is one popular feature of learning technology in pattern recognition. In recent years, many semi-supervised nonnegative matrix factorization \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "157", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Han:2024:DHH, author = "Jiadi Han and Yufei Tang and Qian Tao and Yuhan Xia and Liming Zhang", title = "Dual Homogeneity Hypergraph Motifs with Cross-view Contrastive Learning for Multiple Social Recommendations", journal = j-TKDD, volume = "18", number = "6", pages = "158:1--158:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653976", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653976", abstract = "Social relations are often used as auxiliary information to address data sparsity and cold-start issues in social recommendations. In the real world, social relations among users are complex and diverse. Widely used graph neural networks (GNNs) can only \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "158", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:FFF, author = "Wentai Zhang and HaiHong E. and Haoran Luo and Mingzhi Sun", title = "{FulBM}: Fast Fully Batch Maintenance for Landmark-based $3$-hop Cover Labeling", journal = j-TKDD, volume = "18", number = "6", pages = "159:1--159:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3650035", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3650035", abstract = "Landmark-based 3-hop cover labeling is a category of approaches for shortest distance/path queries on large-scale complex networks. It pre-computes an index offline to accelerate the online distance/path query. Most real-world graphs undergo rapid changes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "159", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:HPL, author = "Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han and Qizhang Feng and Haoming Jiang and Shaochen Zhong and Bing Yin and Xia Hu", title = "Harnessing the Power of {LLMs} in Practice: a Survey on {ChatGPT} and Beyond", journal = j-TKDD, volume = "18", number = "6", pages = "160:1--160:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649506", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Apr 30 06:43:01 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649506", abstract = "This article presents a comprehensive and practical guide for practitioners and end-users working with Large Language Models (LLMs) in their downstream Natural Language Processing (NLP) tasks. We provide discussions and insights into the usage of LLMs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "160", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cui:2024:ISP, author = "Jingyi Cui and Guangquan Xu and Jian Liu and Shicheng Feng and Jianli Wang and Hao Peng and Shihui Fu and Zhaohua Zheng and Xi Zheng and Shaoying Liu", title = "{ID-SR}: Privacy-Preserving Social Recommendation Based on Infinite Divisibility for Trustworthy {AI}", journal = j-TKDD, volume = "18", number = "7", pages = "161:1--161:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639412", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3639412", abstract = "Recommendation systems powered by artificial intelligence (AI) are widely used to improve user experience. However, AI inevitably raises privacy leakage and other security issues due to the utilization of extensive user data. Addressing these challenges \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "161", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:MGC, author = "Xihong Yang and Yiqi Wang and Yue Liu and Yi Wen and Lingyuan Meng and Sihang Zhou and Xinwang Liu and En Zhu", title = "Mixed Graph Contrastive Network for Semi-supervised Node Classification", journal = j-TKDD, volume = "18", number = "7", pages = "162:1--162:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641549", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3641549", abstract = "Graph Neural Networks (GNNs) have achieved promising performance in semi-supervised node classification in recent years. However, the problem of insufficient supervision, together with representation collapse, largely limits the performance of the GNNs in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "162", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ling:2024:FFS, author = "Zhaolong Ling and Enqi Xu and Peng Zhou and Liang Du and Kui Yu and Xindong Wu", title = "Fair Feature Selection: a Causal Perspective", journal = j-TKDD, volume = "18", number = "7", pages = "163:1--163:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643890", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643890", abstract = "Fair feature selection for classification decision tasks has recently garnered significant attention from researchers. However, existing fair feature selection algorithms fall short of providing a full explanation of the causal relationship between \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "163", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kose:2024:FFA, author = "O. Deniz Kose and Yanning Shen", title = "{FairGAT}: Fairness-Aware Graph Attention Networks", journal = j-TKDD, volume = "18", number = "7", pages = "164:1--164:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645096", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3645096", abstract = "Graphs can facilitate modeling various complex systems such as gene networks and power grids as well as analyzing the underlying relations within them. Learning over graphs has recently attracted increasing attention, particularly graph neural network \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "164", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:DSA, author = "Shenghao Liu and Yu Zhang and Lingzhi Yi and Xianjun Deng and Laurence T. Yang and Bang Wang", title = "Dual-Side Adversarial Learning Based Fair Recommendation for Sensitive Attribute Filtering", journal = j-TKDD, volume = "18", number = "7", pages = "165:1--165:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648683", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3648683", abstract = "With the development of recommendation algorithms, researchers are paying increasing attention to fairness issues such as user discrimination in recommendations. To address these issues, existing works often filter users' sensitive information that may \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "165", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ye:2024:BYC, author = "Tiandi Ye and Cen Chen and Yinggui Wang and Xiang Li and Ming Gao", title = "{BapFL}: You can Backdoor Personalized Federated Learning", journal = j-TKDD, volume = "18", number = "7", pages = "166:1--166:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649316", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649316", abstract = "In federated learning (FL), malicious clients could manipulate the predictions of the trained model through backdoor attacks, posing a significant threat to the security of FL systems. Existing research primarily focuses on backdoor attacks and defenses \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "166", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:VWM, author = "Feifei Li and Yuanbin Wang and Oya Beyan and Mirjam Sch{\"o}neck and Liliana Lourenco Caldeira", title = "Voxel-Wise Medical Image Generalization for Eliminating Distribution Shift", journal = j-TKDD, volume = "18", number = "7", pages = "167:1--167:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643034", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3643034", abstract = "Currently, the medical field is witnessing an increase in the use of machine learning techniques. Supervised learning methods adopted in classification, prediction, and segmentation tasks for medical images always experience decreased performance when the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "167", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wen:2024:AIT, author = "Cheng Wen and Yuandao Cai and Bin Zhang and Jie Su and Zhiwu Xu and Dugang Liu and Shengchao Qin and Zhong Ming and Tian Cong", title = "Automatically Inspecting Thousands of Static Bug Warnings with Large Language Model: How Far Are We?", journal = j-TKDD, volume = "18", number = "7", pages = "168:1--168:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653718", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653718", abstract = "Static analysis tools for capturing bugs and vulnerabilities in software programs are widely employed in practice, as they have the unique advantages of high coverage and independence from the execution environment. However, existing tools for analyzing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "168", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2024:ASM, author = "Chenwang Wu and Defu Lian and Yong Ge and Min Zhou and Enhong Chen", title = "Attacking Social Media via Behavior Poisoning", journal = j-TKDD, volume = "18", number = "7", pages = "169:1--169:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654673", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3654673", abstract = "Since social media such as Facebook and X (formerly known as Twitter) have permeated various aspects of daily life, people have strong incentives to influence information dissemination on these platforms and differentiate their content from the fierce \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "169", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Brzezinski:2024:PFM, author = "Dariusz Brzezinski and Julia Stachowiak and Jerzy Stefanowski and Izabela Szczech and Robert Susmaga and Sofya Aksenyuk and Uladzimir Ivashka and Oleksandr Yasinskyi", title = "Properties of Fairness Measures in the Context of Varying Class Imbalance and Protected Group Ratios", journal = j-TKDD, volume = "18", number = "7", pages = "170:1--170:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654659", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3654659", abstract = "Society is increasingly relying on predictive models in fields like criminal justice, credit risk management, and hiring. To prevent such automated systems from discriminating against people belonging to certain groups, fairness measures have become a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "170", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2024:TRT, author = "Yunkai Chen and Qimeng Wang and Shiwei Wu and Yan Gao and Tong Xu and Yao Hu", title = "{TOMGPT}: Reliable Text-Only Training Approach for Cost-Effective Multi-modal Large Language Model", journal = j-TKDD, volume = "18", number = "7", pages = "171:1--171:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654674", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3654674", abstract = "Multi-modal large language models (MLLMs), such as GPT-4, exhibit great comprehension capabilities on human instruction, as well as zero-shot ability on new downstream multi-modal tasks. To integrate the different modalities within a unified embedding \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "171", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:FTT, author = "Jiaxin Zhang and Yiqi Wang and Xihong Yang and En Zhu", title = "A Fully Test-time Training Framework for Semi-supervised Node Classification on Out-of-Distribution Graphs", journal = j-TKDD, volume = "18", number = "7", pages = "172:1--172:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649507", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3649507", abstract = "Graph neural networks (GNNs) have shown great potential in representation learning for various graph tasks. However, the distribution shift between the training and test sets poses a challenge to the efficiency of GNNs. To address this challenge, HomoTTT \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "172", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2024:STR, author = "Ronghang Zhu and Dongliang Guo and Daiqing Qi and Zhixuan Chu and Xiang Yu and Sheng Li", title = "A Survey of Trustworthy Representation Learning Across Domains", journal = j-TKDD, volume = "18", number = "7", pages = "173:1--173:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657301", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3657301", abstract = "As AI systems have obtained significant performance to be deployed widely in our daily lives and human society, people both enjoy the benefits brought by these technologies and suffer many social issues induced by these systems. To make AI systems good \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "173", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:SAS, author = "Mengyao Li and Zhiyong Li and Zhibang Yang and Xu Zhou and Yifan Li and Ziyan Wu and Lingzhao Kong and Ke Nai", title = "{SA2E-AD}: a Stacked Attention Autoencoder for Anomaly Detection in Multivariate Time Series", journal = j-TKDD, volume = "18", number = "7", pages = "174:1--174:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653677", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653677", abstract = "Anomaly detection for multivariate time series is an essential task in the modern industrial field. Although several methods have been developed for anomaly detection, they usually fail to effectively exploit the metrical-temporal correlation and the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "174", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhuang:2024:TRR, author = "Wen-Ming Zhuang and Chih-Yao Chen and Cheng-Te Li", title = "Towards Robust Rumor Detection with Graph Contrastive and Curriculum Learning", journal = j-TKDD, volume = "18", number = "7", pages = "175:1--175:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653023", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653023", abstract = "Establishing a robust rumor detection model is vital in safeguarding the veracity of information on social media platforms. However, existing approaches to stopping rumor from spreading rely on abundant and clean training data, which is rarely available \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "175", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:TFL, author = "Lei Zhang and Lele Fu and Chen Liu and Zhao Yang and Jinghua Yang and Zibin Zheng and Chuan Chen", title = "Toward Few-Label Vertical Federated Learning", journal = j-TKDD, volume = "18", number = "7", pages = "176:1--176:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3656344", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3656344", abstract = "Federated Learning (FL) provides a novel paradigm for privacy-preserving machine learning, enabling multiple clients to collaborate on model training without sharing private data. To handle multi-source heterogeneous data, Vertical Federated Learning (VFL). \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "176", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:LIG, author = "Xinru Liu and Yongjing Hao and Lei Zhao and Guanfeng Liu and Victor S. Sheng and Pengpeng Zhao", title = "{LMACL}: Improving Graph Collaborative Filtering with Learnable Model Augmentation Contrastive Learning", journal = j-TKDD, volume = "18", number = "7", pages = "177:1--177:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657302", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3657302", abstract = "Graph collaborative filtering (GCF) has achieved exciting recommendation performance with its ability to aggregate high-order graph structure information. Recently, contrastive learning (CL) has been incorporated into GCF to alleviate data sparsity and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "177", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Schlieper:2024:EUO, author = "Philipp Schlieper and Hermann Luft and Kai Klede and Christoph Strohmeyer and Bjoern Eskofier and Dario Zanca", title = "Enhancing Unsupervised Outlier Model Selection: a Study on {IREOS} Algorithms", journal = j-TKDD, volume = "18", number = "7", pages = "178:1--178:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653719", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3653719", abstract = "Outlier detection stands as a critical cornerstone in the field of data mining, with a wide range of applications spanning from fraud detection to network security. However, real-world scenarios often lack labeled data for training, necessitating \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "178", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sui:2024:CAS, author = "Hongjie Sui and Huan Yan and Tianyi Zheng and Wenzhen Huang and Yunlin Zhuang and Yong Li", title = "Congestion-aware Spatio-Temporal Graph Convolutional Network-based {$ A* $} Search Algorithm for Fastest Route Search", journal = j-TKDD, volume = "18", number = "7", pages = "179:1--179:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657640", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3657640", abstract = "The fastest route search, which is to find a path with the shortest travel time when the user initiates a query, has become one of the most important services in many map applications. To enhance the user experience of travel, it is necessary to achieve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "179", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bicego:2024:CRF, author = "Manuele Bicego and Ferdinando Cicalese", title = "Computing Random Forest-distances in the presence of missing data", journal = j-TKDD, volume = "18", number = "7", pages = "180:1--180:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3656345", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3656345", abstract = "In this article, we study the problem of computing Random Forest-distances in the presence of missing data. We present a general framework which avoids pre-imputation and uses in an agnostic way the information contained in the input points. We centre our \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "180", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Deng:2024:DPL, author = "Jiayi Deng and Danyang Huang and Bo Zhang", title = "Distributed Pseudo-Likelihood Method for Community Detection in Large-Scale Networks", journal = j-TKDD, volume = "18", number = "7", pages = "181:1--181:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657300", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3657300", abstract = "This paper proposes a distributed pseudo-likelihood method (DPL) to conveniently identify the community structure of large-scale networks. Specifically, we first propose a block-wise splitting method to divide large-scale network data into several \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "181", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xia:2024:FEF, author = "Bolun (Namir) Xia and Vipula Rawte and Aparna Gupta and Mohammed Zaki", title = "{FETILDA}: Evaluation Framework for Effective Representations of Long Financial Documents", journal = j-TKDD, volume = "18", number = "7", pages = "182:1--182:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657299", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jun 22 11:37:44 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3657299", abstract = "In the financial sphere, there is a wealth of accumulated unstructured financial data, such as the textual disclosure documents that companies submit on a regular basis to regulatory agencies, such as the Securities and Exchange Commission. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "182", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2024:IGC, author = "Penghang Yu and Bing-Kun Bao and Zhiyi Tan and Guanming Lu", title = "Improving Graph Collaborative Filtering with Directional Behavior Enhanced Contrastive Learning", journal = j-TKDD, volume = "18", number = "8", pages = "183:1--183:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663574", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3663574", abstract = "Graph Collaborative Filtering is a widely adopted approach for recommendation, which captures similar behavior features through Graph Neural Network (GNN). Recently, Contrastive Learning (CL) has been demonstrated as an effective method to enhance the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "183", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2024:FNS, author = "Fengcheng Lu and Michael Kwok-Po Ng", title = "{FastHGNN}: a New Sampling Technique for Learning with Hypergraph Neural Networks", journal = j-TKDD, volume = "18", number = "8", pages = "184:1--184:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663670", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3663670", abstract = "Hypergraphs can represent higher-order relations among objects. Traditional hypergraph neural networks involve node-edge-node transform, leading to high computational cost and timing. The main aim of this article is to propose a new sampling technique for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "184", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:EEA, author = "Yinqiu Huang and Min Gao and Kai Shu and Chenghua Lin and Jia Wang and Wei Zhou", title = "{EML}: Emotion-Aware Meta Learning for Cross-Event False Information Detection", journal = j-TKDD, volume = "18", number = "8", pages = "185:1--185:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3661485", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3661485", abstract = "Modern social media's development has dramatically changed how people obtain information. However, the wide dissemination of various false information has severe detrimental effects. Accordingly, many deep learning-based methods have been proposed to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "185", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qian:2024:LAL, author = "Yu-Yang Qian and Zhen-Yu Zhang and Peng Zhao and Zhi-Hua Zhou", title = "Learning with Asynchronous Labels", journal = j-TKDD, volume = "18", number = "8", pages = "186:1--186:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3662186", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3662186", abstract = "Learning with data streams has attracted much attention in recent decades. Conventional approaches typically assume that the feature and label of a data item can be timely observed at each round. In many real-world tasks, however, it often occurs that \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "186", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2024:VAD, author = "Yifan He and Yatao Bian and Xi Ding and Bingzhe Wu and Jihong Guan and Ji Zhang and Shuigeng Zhou", title = "Variate Associated Domain Adaptation for Unsupervised Multivariate Time Series Anomaly Detection", journal = j-TKDD, volume = "18", number = "8", pages = "187:1--187:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663573", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3663573", abstract = "Multivariate Time Series Anomaly Detection (MTS-AD) is crucial for the effective management and maintenance of devices in complex systems, such as server clusters, spacecrafts, and financial systems, and so on. However, upgrade or cross-platform \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "187", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2024:DCG, author = "Tianhai Liang and Qiangqiang Shen and Shuqin Wang and Yongyong Chen and Guokai Zhang and Junxin Chen", title = "Data Completion-Guided Unified Graph Learning for Incomplete Multi-View Clustering", journal = j-TKDD, volume = "18", number = "8", pages = "188:1--188:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664290", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3664290", abstract = "Due to its heterogeneous property, multi-view data has been widely concerned over single-view data for performance improvement. Unfortunately, some instances may be with partially available information because of some uncontrollable factors, for which the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "188", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:MUT, author = "Tong Li and Shuodi Hui and Shiyuan Zhang and Huandong Wang and Yuheng Zhang and Pan Hui and Depeng Jin and Yong Li", title = "Mobile User Traffic Generation Via Multi-Scale Hierarchical {GAN}", journal = j-TKDD, volume = "18", number = "8", pages = "189:1--189:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664655", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3664655", abstract = "Mobile user traffic facilitates diverse applications, including network planning and optimization, whereas large-scale mobile user traffic is hardly available due to privacy concerns. One alternative solution is to generate mobile user traffic data for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "189", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{GomesDeOliveiraMartinsNicola:2024:IRM, author = "Victor {Gomes De Oliveira Martins Nicola} and Karina {Valdivia Delgado} and Marcelo de Souza Lauretto", title = "Imbalance-Robust Multi-Label Self-Adjusting {kNN}", journal = j-TKDD, volume = "18", number = "8", pages = "190:1--190:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663575", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3663575", abstract = "In the task of multi-label classification in data streams, instances arriving in real-time need to be associated with multiple labels simultaneously. Various methods based on the k Nearest Neighbors algorithm have been proposed to address this task. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "190", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:MOR, author = "Xiangyu Li and Hua Wang", title = "On Mean-Optimal Robust Linear Discriminant Analysis", journal = j-TKDD, volume = "18", number = "8", pages = "191:1--191:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665500", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3665500", abstract = "Linear discriminant analysis (LDA) is widely used for dimensionality reduction under supervised learning settings. Traditional LDA objective aims to minimize the ratio of the squared Euclidean distances that may not perform optimally on noisy datasets. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "191", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ekle:2024:ADD, author = "Ocheme Anthony Ekle and William Eberle", title = "Anomaly Detection in Dynamic Graphs: a Comprehensive Survey", journal = j-TKDD, volume = "18", number = "8", pages = "192:1--192:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3669906", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3669906", abstract = "This survey article presents a comprehensive and conceptual overview of anomaly detection (AD) using dynamic graphs. We focus on existing graph-based AD techniques and their applications to dynamic networks. The contributions of this survey article \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "192", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xi:2024:UOR, author = "Yunjia Xi and Weiwen Liu and Xinyi Dai and Ruiming Tang and Qing Liu and Weinan Zhang and Yong Yu", title = "Utility-Oriented Reranking with Counterfactual Context", journal = j-TKDD, volume = "18", number = "8", pages = "193:1--193:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3671004", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3671004", abstract = "As a critical task for large-scale commercial recommender systems, reranking rearranges items in the initial ranking lists from the previous ranking stage to better meet users' demands. Foundational work in reranking has shown the potential of improving \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "193", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yin:2024:CVK, author = "Jiao Yin and Wei Hong and Hua Wang and Jinli Cao and Yuan Miao and Yanchun Zhang", title = "A Compact Vulnerability Knowledge Graph for Risk Assessment", journal = j-TKDD, volume = "18", number = "8", pages = "194:1--194:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3671005", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3671005", abstract = "Software vulnerabilities, also known as flaws, bugs or weaknesses, are common in modern information systems, putting critical data of organizations and individuals at cyber risk. Due to the scarcity of resources, initial risk assessment is becoming a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "194", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:DPD, author = "Lingkai Yang and Sally McClean and Mark Donnelly and Kashaf Khan and Kevin Burke", title = "Detecting Process Duration Drift Using Gamma Mixture Models in a Left-Truncated and Right-Censored Environment", journal = j-TKDD, volume = "18", number = "8", pages = "195:1--195:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3669942", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3669942", abstract = "Within the realm of business context, process duration signifies time spent by customers between successive activities. This temporal perspective offers important insight to customer behavior, highlighting potential bottlenecks, and influencing business \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "195", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:ADA, author = "Yang Zhang and Ting Yu and Shengqiang Chi and Zhen Wang and Yue Gao and Ji Zhang and Tianshu Zhou", title = "Attribute Diversity Aware Community Detection on Attributed Graphs Using Three-View Graph Attention Neural Networks", journal = j-TKDD, volume = "18", number = "8", pages = "196:1--196:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3672081", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3672081", abstract = "Community detection is a fundamental yet important task for characterizing and understanding the structure of attributed graphs. Existing methods mainly focus on the structural tightness and attribute similarity among nodes in a community. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "196", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:HMP, author = "Munan Li and Kai Liu and Hongbo Liu and Zheng Zhao and Tomas E. Ward and Xindong Wu", title = "Heterogeneous Meta-Path Graph Learning for Higher-Order Social Recommendation", journal = j-TKDD, volume = "18", number = "8", pages = "197:1--197:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3673658", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3673658", abstract = "Recommendation systems have become an indispensable part of daily life. Social recommendation systems, which utilize social relationships and past behaviors to infer users' preferences, have gained popularity in recent years. Exploring the inherent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "197", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2024:DUP, author = "Yuliang Liang and Enneng Yang and Guibing Guo and Wei Cai and Linying Jiang and Xingwei Wang", title = "Deconfounding User Preference in Recommendation Systems through Implicit and Explicit Feedback", journal = j-TKDD, volume = "18", number = "8", pages = "198:1--198:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3673762", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3673762", abstract = "Recommender systems are influenced by many confounding factors (i.e., confounders) which result in various biases (e.g., popularity biases) and inaccurate user preference. Existing approaches try to eliminate these biases by inference with causal graphs. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "198", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2024:LIT, author = "Ziyu Zhao and Yuqi Bai and Ruoxuan Xiong and Qingyu Cao and Chao Ma and Ning Jiang and Fei Wu and Kun Kuang", title = "Learning Individual Treatment Effects under Heterogeneous Interference in Networks", journal = j-TKDD, volume = "18", number = "8", pages = "199:1--199:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3673761", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3673761", abstract = "Estimating individual treatment effects in networked observational data is a crucial and increasingly recognized problem. One major challenge of this problem is violating the stable unit treatment value assumption (SUTVA), which posits that a unit's \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "199", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2024:SPS, author = "Fei Ma and Ping Wang", title = "Structural Properties on Scale-Free Tree Network with an Ultra-Large Diameter", journal = j-TKDD, volume = "18", number = "8", pages = "200:1--200:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3674146", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3674146", abstract = "Scale-free networks are prevalently observed in a great variety of complex systems, which triggers various researches relevant to networked models of such type. In this work, we propose a family of growth tree networks $\mathcal{T}_t$, which turn \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "200", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:QNN, author = "Jiaye Li and Jinjing Shi and Jian Zhang and Yuhu Lu and Qin Li and Chunlin Yu and Shichao Zhang", title = "Quantum Nearest Neighbor Collaborative Filtering Algorithm for Recommendation System", journal = j-TKDD, volume = "18", number = "8", pages = "201:1--201:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3674982", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3674982", abstract = "Recommendation has become especially crucial during the COVID-19 pandemic as a significant number of people rely on online shopping from home. Existing recommendation algorithms, designed to address issues like cold start and data sparsity, often overlook \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "201", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ding:2024:TFD, author = "Shifei Ding and Benyu Wu and Ling Ding and Xiao Xu and Lili Guo and Hongmei Liao and Xindong Wu", title = "Towards Faster Deep Graph Clustering via Efficient Graph Auto-Encoder", journal = j-TKDD, volume = "18", number = "8", pages = "202:1--202:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3674983", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3674983", abstract = "Deep graph clustering (DGC) has been a promising method for clustering graph data in recent years. However, existing research primarily focuses on optimizing clustering outcomes by improving the quality of embedded representations, resulting in slow-speed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "202", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2024:TDA, author = "Mingchen Sun and Yingji Li and Ying Wang and Xin Wang", title = "Towards Domain-Aware Stable Meta Learning for Out-of-Distribution Generalization", journal = j-TKDD, volume = "18", number = "8", pages = "203:1--203:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676558", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3676558", abstract = "Deep learning models are often trained on datasets that are limited in size and distribution, which may not fully represent the entire range of data encountered in practice. Thus, making deep learning models generalize to out-of-distribution data has \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "203", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wei:2024:NER, author = "Xiangyu Wei and Wei Wang and Chongsheng Zhang and Weiping Ding and Bin Wang and Yaguan Qian and Zhen Han and Chunhua Su", title = "Neighbor-Enhanced Representation Learning for Link Prediction in Dynamic Heterogeneous Attributed Networks", journal = j-TKDD, volume = "18", number = "8", pages = "204:1--204:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676559", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3676559", abstract = "Dynamic link prediction aims to predict future connections among unconnected nodes in a network. It can be applied for friend recommendations, link completion, and other tasks. Network representation learning algorithms have demonstrated considerable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "204", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:MMI, author = "Hongyu Li and Lefei Zhang and Kehua Su and Wei Yu", title = "{MICCF}: a Mutual Information Constrained Clustering Framework for Learning Clustering-Oriented Feature Representations", journal = j-TKDD, volume = "18", number = "8", pages = "205:1--205:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3672402", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3672402", abstract = "Deep clustering is a crucial task in machine learning and data mining that focuses on acquiring feature representations conducive to clustering. Previous research relies on self-supervised representation learning for general feature representations, such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "205", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liao:2024:APT, author = "Chengwu Liao and Chao Chen and Wanyi Zhang and Suiming Guo and Chao Liu", title = "{AGENDA}: Predicting Trip Purposes with A New Graph Embedding Network and Active Domain Adaptation", journal = j-TKDD, volume = "18", number = "8", pages = "206:1--206:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677020", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3677020", abstract = "Trip purpose is a meaningful aspect of travel behaviour for the understanding of urban mobility. However, it is non-trivial to automatically obtain trip purposes. On one hand, trip purposes are naturally diverse and complicated, but the available \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "206", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Peng:2024:MSS, author = "Bo Peng and Ziqi Chen and Srinivasan Parthasarathy and Xia Ning", title = "Modeling Sequences as Star Graphs to Address Over-Smoothing in Self-Attentive Sequential Recommendation", journal = j-TKDD, volume = "18", number = "8", pages = "207:1--207:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676560", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3676560", abstract = "Self-attention (SA) mechanisms have been widely used in developing sequential recommendation (SR) methods, and demonstrated state-of-the-art performance. However, in this article, we show that self-attentive SR methods substantially suffer from the over-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "207", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:FFL, author = "Chunnan Wang and Xiangyu Shi and Hongzhi Wang", title = "Fair Federated Learning with Multi-Objective Hyperparameter Optimization", journal = j-TKDD, volume = "18", number = "8", pages = "208:1--208:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676968", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3676968", abstract = "Federated learning (FL) is an attractive paradigm for privacy-aware distributed machine learning, which enables clients to collaboratively learn a global model without sharing clients' data. Recently, many strategies have been proposed to improve the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "208", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2024:CED, author = "Peng Zhou and Yufeng Guo and Haoran Yu and Yuanting Yan and Yanping Zhang and Xindong Wu", title = "Concept Evolution Detecting over Feature Streams", journal = j-TKDD, volume = "18", number = "8", pages = "209:1--209:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3678012", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3678012", abstract = "The explosion of data volume has gradually transformed big data processing from the static batch mode to the online streaming model. Streaming data can be divided into instance streams (feature space remains fixed while instances increase over time), \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "209", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Meng:2024:SIB, author = "Siyuan Meng and Jie Zhou and Xuxin Chen and Yufei Liu and Fengyuan Lu and Xinli Huang", title = "Structure-Information-Based Reasoning over the Knowledge Graph: a Survey of Methods and Applications", journal = j-TKDD, volume = "18", number = "8", pages = "210:1--210:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3671148", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Wed Aug 28 06:29:41 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3671148", abstract = "The knowledge graph (KG) is an efficient form of knowledge organization and expression, providing prior knowledge support for various downstream tasks, and has received extensive attention in natural language processing. However, existing large-scale KGs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "210", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fu:2024:SCM, author = "Lele Fu and Sheng Huang and Lei Zhang and Jinghua Yang and Zibin Zheng and Chuanfu Zhang and Chuan Chen", title = "Subspace-Contrastive Multi-View Clustering", journal = j-TKDD, volume = "18", number = "9", pages = "211:1--211:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3674839", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3674839", abstract = "Most multi-view clustering methods based on shallow models are limited in sound nonlinear information perception capability, or fail to effectively exploit complementary information hidden in different views. To tackle these issues, we propose a novel \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "211", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:MGE, author = "Yonghao Liu and Mengyu Li and Ximing Li and Lan Huang and Fausto Giunchiglia and Yanchun Liang and Xiaoyue Feng and Renchu Guan", title = "{Meta-GPS++}: Enhancing Graph Meta-Learning with Contrastive Learning and Self-Training", journal = j-TKDD, volume = "18", number = "9", pages = "212:1--212:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3679018", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3679018", abstract = "Node classification is an essential problem in graph learning. However, many models typically obtain unsatisfactory performance when applied to few-shot scenarios. Some studies have attempted to combine meta-learning with graph neural networks to solve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "212", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kumar:2024:STP, author = "Rahul Kumar and Jo{\~a}o Mendes-Moreira and Joydeep Chandra", title = "Spatio-Temporal Parallel Transformer Based Model for Traffic Prediction", journal = j-TKDD, volume = "18", number = "9", pages = "213:1--213:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3679017", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3679017", abstract = "Traffic forecasting problems involve jointly modeling the non-linear spatio-temporal dependencies at different scales. While graph neural network models have been effectively used to capture the non-linear spatial dependencies, capturing the dynamic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "213", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Voevodski:2024:LSC, author = "Konstantin Voevodski", title = "Large-Scale {$K$}-Clustering", journal = j-TKDD, volume = "18", number = "9", pages = "214:1--214:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3674508", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3674508", abstract = "Large-scale learning algorithms are essential for modern data collections that may have billions of data points. Here, we study the design of parallel $k$-clustering algorithms, which include the $k$-median, $k$-medoids, and $k$-means \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "214", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2024:MPR, author = "Qiaohong Yu and Huandong Wang and Yu Liu and Depeng Jin and Yong Li and Lin Zhu and Junlan Feng", title = "Mobility Prediction via Rule-enhanced Knowledge Graph", journal = j-TKDD, volume = "18", number = "9", pages = "215:1--215:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677019", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3677019", abstract = "With the rapid development of location acquisition technologies, massive mobile trajectories have been collected and made available to us, which support a fantastic way of understanding and modeling individuals' mobility. However, existing data-driven \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "215", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2024:SAP, author = "Jiacheng Yang and Miaoxin Chen and Cao Liu and Boqi Dai and Hai-Tao Zheng and Hui Wang and Rui Xie and Hong-Gee Kim", title = "A Segment Augmentation and Prediction Consistency Framework for Multi-label Unknown Intent Detection", journal = j-TKDD, volume = "18", number = "9", pages = "216:1--216:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3680286", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3680286", abstract = "Multi-label unknown intent detection is a challenging task where each utterance may contain not only multiple known but also unknown intents. To tackle this challenge, pioneers proposed to predict the intent number of the utterance first, then compare it \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "216", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Burkhardt:2024:TC, author = "Paul Burkhardt", title = "Triangle Centrality", journal = j-TKDD, volume = "18", number = "9", pages = "217:1--217:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3685677", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3685677", abstract = "Triangle centrality is introduced for finding important vertices in a graph based on the concentration of triangles surrounding each vertex. It has the distinct feature of allowing a vertex to be central if it is in many triangles or none at all. Given a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "217", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2024:BSA, author = "Qianru Zhang and Zheng Wang and Cheng Long and Siu-Ming Yiu", title = "Billiards Sports Analytics: Datasets and Tasks", journal = j-TKDD, volume = "18", number = "9", pages = "218:1--218:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3686804", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3686804", abstract = "Nowadays, it becomes a common practice to capture some data of sports games with devices such as GPS sensors and cameras and then use the data to perform various analyses on sports games, including tactics discovery, similar game retrieval, performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "218", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kim:2024:TDG, author = "Min-Jeong Kim and Yeon-Chang Lee and David Y. Kang and Sang-Wook Kim", title = "Trustworthiness-Driven Graph Convolutional Networks for Signed Network Embedding", journal = j-TKDD, volume = "18", number = "9", pages = "219:1--219:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3685279", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3685279", abstract = "The problem of representing nodes in a signed network as low-dimensional vectors, known as signed network embedding (SNE), has garnered considerable attention in recent years. While several SNE methods based on graph convolutional networks (GCNs) have \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "219", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gong:2024:VAV, author = "Yan Gong and Georgina Cosma and Axel Finke", title = "{VITR}: Augmenting Vision Transformers with Relation-Focused Learning for Cross-modal Information Retrieval", journal = j-TKDD, volume = "18", number = "9", pages = "220:1--220:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3686805", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3686805", abstract = "The relations expressed in user queries are vital for cross-modal information retrieval. Relation-focused cross-modal retrieval aims to retrieve information that corresponds to these relations, enabling effective retrieval across different modalities. Pre-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "220", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ying:2024:FSD, author = "Wangyang Ying and Dongjie Wang and Haifeng Chen and Yanjie Fu", title = "Feature Selection as Deep Sequential Generative Learning", journal = j-TKDD, volume = "18", number = "9", pages = "221:1--221:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687485", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3687485", abstract = "Feature selection aims to identify the most pattern-discriminative feature subset. In prior literature, filter (e.g., backward elimination) and embedded (e.g., LASSO) methods have hyperparameters (e.g., top- k, score thresholding) and tie to specific \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "221", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2024:VNV, author = "Wei Liu and Leong Hou U. and Shangsong Liang and Huaijie Zhu and Jianxing Yu and Yubao Liu and Jian Yin", title = "{VAE*}: a Novel Variational Autoencoder via Revisiting Positive and Negative Samples for Top-{$N$} Recommendation", journal = j-TKDD, volume = "18", number = "9", pages = "222:1--222:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3680552", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3680552", abstract = "Due to the easy access, implicit feedback is often used for recommender systems. Compared with point-wise learning and pair-wise learning methods, list-wise rank learning methods have superior performance for top- $N$ recommendation. Recent solutions, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "222", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:SKG, author = "Zhu Wang and Fengxia Han and Shengjie Zhao", title = "A Survey on Knowledge Graph Related Research in Smart City Domain", journal = j-TKDD, volume = "18", number = "9", pages = "223:1--223:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3672615", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3672615", abstract = "Knowledge graph employs the specific graph structure to store knowledge in the form of entities, relations, attributes, and so forth, which can effectively represent correlations among data and has been applied in many fields, including search engine \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "223", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:SCC, author = "Hongjun Wang and Yi Song and Wei Chen and Zhipeng Luo and Chongshou Li and Tianrui Li", title = "A Survey of Co-Clustering", journal = j-TKDD, volume = "18", number = "9", pages = "224:1--224:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3681793", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3681793", abstract = "Co-clustering is to cluster samples and features simultaneously, which can also reveal the relationship between row clusters and column clusters. Therefore, lots of scientists have drawn much attention to conduct extensive research on it, and co-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "224", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cheng:2024:NSM, author = "Kewei Cheng and Nesreen K. Ahmed and Ryan A. Rossi and Theodore Willke and Yizhou Sun", title = "Neural-Symbolic Methods for Knowledge Graph Reasoning: a Survey", journal = j-TKDD, volume = "18", number = "9", pages = "225:1--225:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3686806", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3686806", abstract = "Neural symbolic knowledge graph (KG) reasoning offers a promising approach that combines the expressive power of symbolic reasoning with the learning capabilities inherent in neural networks. This survey provides a comprehensive overview of advancements, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "225", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tan:2024:MLA, author = "Tao Tan and Hong Xie and Xiaoyu Shi and Mingsheng Shang", title = "A Meta-Learning Approach to Mitigating the Estimation Bias of {$Q$}-Learning", journal = j-TKDD, volume = "18", number = "9", pages = "226:1--226:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3688849", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3688849", abstract = "It is a longstanding problem that Q-learning suffers from the overestimation bias. This issue originates from the fact that Q-learning uses the expectation of maximum Q-value to approximate the maximum expected Q-value. A number of algorithms, such as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "226", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tan:2024:GRL, author = "Jun Tan and Zhifeng Qiu and Ning Gui", title = "Graph Representation Learning Enhanced Semi-Supervised Feature Selection", journal = j-TKDD, volume = "18", number = "9", pages = "227:1--227:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689428", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3689428", abstract = "Feature selection is a key step in machine learning by eliminating features that are not related to the modeling target to create reliable and interpretable models. By exploring the potential complex correlations among features of unlabeled data, recently \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "227", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:PGP, author = "Keyi Li and Sen Yang and Travis M. Sullivan and Randall S. Burd and Ivan Marsic", title = "{ProcessGAN}: Generating Privacy-Preserving Time-Aware Process Data with Conditional Generative Adversarial Nets", journal = j-TKDD, volume = "18", number = "9", pages = "228:1--228:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687464", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3687464", abstract = "Process data constructed from event logs provides valuable insights into procedural dynamics over time. The confidential information in process data, together with the data's intricate nature, makes the datasets not sharable and challenging to collect. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "228", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ren:2024:TCL, author = "Jiaqian Ren and Hao Peng and Lei Jiang and Zhifeng Hao and Jia Wu and Shengxiang Gao and Zhengtao Yu and Qiang Yang", title = "Toward Cross-Lingual Social Event Detection with Hybrid Knowledge Distillation", journal = j-TKDD, volume = "18", number = "9", pages = "229:1--229:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689948", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3689948", abstract = "Recently published graph neural networks (GNNs) show promising performance at social event detection tasks. However, most studies are oriented toward monolingual data in languages with abundant training samples. This has left the common lesser-spoken \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "229", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yin:2024:SSP, author = "Nan Yin and Li Shen and Chong Chen and Xian-Sheng Hua and Xiao Luo", title = "{SPORT}: a Subgraph Perspective on Graph Classification with Label Noise", journal = j-TKDD, volume = "18", number = "9", pages = "230:1--230:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687468", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3687468", abstract = "Graph neural networks (GNNs) have achieved great success recently on graph classification tasks using supervised end-to-end training. Unfortunately, extensive noisy graph labels could exist in the real world because of the complicated processes of manual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "230", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2024:HNM, author = "Shuo Yu and Feng Xia and Honglong Chen and Ivan Lee and Lianhua Chi and Hanghang Tong", title = "Heterogeneous Network Motif Coding, Counting, and Profiling", journal = j-TKDD, volume = "18", number = "9", pages = "231:1--231:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687465", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3687465", abstract = "Network motifs, as a fundamental higher-order structure in large-scale networks, have received significant attention over recent years. Particularly in heterogeneous networks, motifs offer a higher capacity to uncover diverse information compared to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "231", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2024:STA, author = "Jiahui Feng and Hefu Liu and Jingmei Zhou and Yang Zhou", title = "A Spatial-Temporal Aggregated Graph Neural Network for Docked Bike-sharing Demand Forecasting", journal = j-TKDD, volume = "18", number = "9", pages = "232:1--232:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3690388", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3690388", abstract = "Predicting the number of rented and returned bikes at each station is crucial for operators to proactively manage shared bike relocation. Although existing research has proposed spatial-temporal prediction models that significantly advance traffic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "232", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2024:FSI, author = "Yidong Wang and Meng Ding and Jinhui Xu and Di Wang", title = "Fair Single Index Model", journal = j-TKDD, volume = "18", number = "9", pages = "233:1--233:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3690646", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3690646", abstract = "Single index models (SIMs) have been widely used in various applications due to their simplicity and interpretability. However, despite the potential for SIMs to result in discriminatory outcomes based on sensitive attributes like gender, race, or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "233", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cribeiro-Ramallo:2024:EGH, author = "Jose Cribeiro-Ramallo and Vadim Arzamasov and Klemens B{\"o}hm", title = "Efficient Generation of Hidden Outliers for Improved Outlier Detection", journal = j-TKDD, volume = "18", number = "9", pages = "234:1--234:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3690827", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3690827", abstract = "Outlier generation is a popular technique used to solve important outlier detection tasks. Generating outliers with realistic behavior is challenging. Popular existing methods tend to disregard the ``multiple views'' property of outliers in high-dimensional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "234", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2024:LIS, author = "Longji Huang and Jianbin Huang and He Li and Jiangtao Cui", title = "{LSTGCN}: Inductive Spatial Temporal Imputation Using Long Short-Term Dependencies", journal = j-TKDD, volume = "18", number = "9", pages = "235:1--235:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3690645", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3690645", abstract = "Spatial temporal forecasting of urban sensors is essentially important for many urban systems, such as intelligent transportation and smart cities. However, due to the problem of hardware failure or network failure, there are some missing values or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "235", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:MLE, author = "Shucheng Li and Jingzhou Zhu and Boyu Chang and Hao Wu and Fengyuan Xu and Sheng Zhong", title = "Multi-Label and Evolvable Dataset Preparation for {Web}-Based Object Detection", journal = j-TKDD, volume = "18", number = "9", pages = "236:1--236:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695465", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3695465", abstract = "In this article, we focus on the emerging field of web-based object detection, which has gained considerable attention due to its ability to utilize large amounts of web data for training, thus eliminating the need for labor-intensive manual annotations. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "236", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Oh:2024:FSR, author = "Sejoon Oh and Berk Ustun and Julian Mcauley and Srijan Kumar", title = "{FINEST}: Stabilizing Recommendations by Rank-Preserving Fine-Tuning", journal = j-TKDD, volume = "18", number = "9", pages = "237:1--237:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695256", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3695256", abstract = "Modern recommender systems may output considerably different recommendations due to small perturbations in the training data. Changes in the data from a single user will alter the recommendations as well as the recommendations of other users. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "237", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2024:NTB, author = "Yifan Li and Shuhan Qi and Xuan Wang and Jiajia Zhang and Lei Cui", title = "A Novel Tree-Based Method for Interpretable Reinforcement Learning", journal = j-TKDD, volume = "18", number = "9", pages = "238:1--238:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695464", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Nov 21 05:55:25 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3695464", abstract = "Deep reinforcement learning (DRL) has garnered remarkable success across various domains, propelled by advancements in deep learning (DL) technologies. However, the opacity of DL presents significant challenges, limiting the application of DRL in critical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "238", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2025:EPT, author = "Guangqian Yang and Lei Zhang and Yi Liu and Hongtao Xie and Zhendong Mao", title = "Exploiting Pre-Trained Language Models for Black-{Box} Attack against Knowledge Graph Embeddings", journal = j-TKDD, volume = "19", number = "1", pages = "1:1--1:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3688850", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3688850", abstract = "Despite the emerging research on adversarial attacks against knowledge graph embedding (KGE) models, most of them focus on white-box attack settings. However, white-box attacks are difficult to apply in practice compared to black-box attacks since they \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yu:2025:HEC, author = "Xiao Yu and Hui Liu and Yan Zhang and Yuxiu Lin and Caiming Zhang", title = "Hubness-Enabled Clustering and Recovery for Large-Scale Incomplete Multi-View Data", journal = j-TKDD, volume = "19", number = "1", pages = "2:1--2:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3694689", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3694689", abstract = "Incomplete multi-view clustering has gained considerable attention in recent years due to the prevalence of incomplete multi-view data in real-world applications. However, existing methods often struggle to effectively deal with large-scale datasets, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Harel:2025:SMF, author = "Omer David Harel and Robert Moskovitch", title = "{STORM}: a {MapReduce} Framework for Symbolic Time Intervals Series Classification", journal = j-TKDD, volume = "19", number = "1", pages = "3:1--3:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3694788", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3694788", abstract = "Symbolic Time Intervals (STIs) represent events having a non-zero time duration, which are common in various application domains. In this article, we focus on the challenge of STIs series classification (STIC). While in the related problem of time series \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lei:2025:LDG, author = "Fatang Lei and Chao Zhang and Huaxiong Li and Yang Gao and Chunlin Chen", title = "Label Distribution Guided Hashing for Cross-Modal Retrieval", journal = j-TKDD, volume = "19", number = "1", pages = "4:1--4:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3697353", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3697353", abstract = "Hashing methods have recently attracted extensive attention in cross-modal retrieval. Most supervised hashing methods attempt to preserve the semantic information into hash codes by leveraging the original logical label matrix. However, they generally \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:AST, author = "Zhiwen Zhang and Hongjun Wang and Zipei Fan and Xuan Song and Ryosuke Shibasaki", title = "Assessing the Spatial-Temporal Causal Impact of {COVID-19-Related} Policies on Epidemic Spread", journal = j-TKDD, volume = "19", number = "1", pages = "5:1--5:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3697841", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3697841", abstract = "Analyzing the causal impact of various government-related policies on the epidemic spread is of critical importance. This article aims to investigate the problem of assessing the causal effects of different COVID-19-related policies on the USA epidemic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2025:ITS, author = "Hanwen Hu and Shiyou Qian and Dingyu Yang and Jian Cao and Guangtao Xue", title = "Iterative Time Series Imputation by Maintaining Dependency Consistency", journal = j-TKDD, volume = "19", number = "1", pages = "6:1--6:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3698107", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3698107", abstract = "Data imputation is crucial in the analysis of incomplete time series, such as forecasting and classification, which involves learning dependencies among the observed values to infer missing ones. As there are no ground truths for missing values, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mao:2025:PMA, author = "Qingyang Mao and Zhi Li and Qi Liu and Likang Wu and Hefu Zhang and Enhong Chen", title = "Promoting Machine Abilities of Discovering and Utilizing Knowledge in a Unified Zero-Shot Learning Paradigm", journal = j-TKDD, volume = "19", number = "1", pages = "7:1--7:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3700444", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3700444", abstract = "Knowledge discovery and utilization are two essential cognitive processes that enable humans to understand the world and extract new insights from their surroundings. These processes have motivated machine learning studies, particularly zero-shot (ZS) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mai:2025:FFG, author = "Chengyuan Mai and Tianchi Liao and Chuan Chen and Zibin Zheng", title = "{FGTL}: Federated Graph Transfer Learning for Node Classification", journal = j-TKDD, volume = "19", number = "1", pages = "8:1--8:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3699962", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3699962", abstract = "Unsupervised multi-source domain transfer in federated scenario has become an emerging research direction, which can help unlabeled target domain to obtain the adapted model through source domains under privacy-preserving. However, when local data are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ling:2025:CDU, author = "Zhaolong Ling and Bo Li and Yiwen Zhang and Peng Zhou and Xingyu Wu and Yuee Huang and Kui Yu and Xindong Wu", title = "Causal Discovery Using Weight-Based Conditional Independence Test", journal = j-TKDD, volume = "19", number = "1", pages = "9:1--9:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3687467", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3687467", abstract = "Conditional Independence (CI) tests play an essential role in causal discovery from observational data, enabling the measurement of independence between two nodes. However, traditional CI tests ignore the imbalanced occurrence probabilities of node values,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2025:SRC, author = "Bing He and Yibo Hu and Yeon-Chang Lee and Soyoung Oh and Gaurav Verma and Srijan Kumar", title = "A Survey on the Role of Crowds in Combating Online Misinformation: Annotators, Evaluators, and Creators", journal = j-TKDD, volume = "19", number = "1", pages = "10:1--10:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3694980", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3694980", abstract = "Online misinformation poses a global risk with significant real-world consequences. To combat misinformation, current research relies on professionals like journalists and fact-checkers for annotating and debunking false information while also developing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2025:SAN, author = "Donghui Chen and Ling Chen and Zongjiang Shang and Youdong Zhang and Bo Wen and Chenghu Yang", title = "Scale-Aware Neural Architecture Search for Multivariate Time Series Forecasting", journal = j-TKDD, volume = "19", number = "1", pages = "11:1--11:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701038", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3701038", abstract = "Multivariate time series (MTS) forecasting has attracted much attention in many intelligent applications. It is not a trivial task, as we need to consider both intra-variable dependencies and inter-variable dependencies. However, existing works are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:LTA, author = "Hao Zhang and Ting-Zhu Huang and Xi-Le Zhao and Shuqin Zhang and Jin-Yu Xie and Tai-Xiang Jiang and Michael K. Ng", title = "Learnable Transform-Assisted Tensor Decomposition for Spatio-Irregular Multidimensional Data Recovery", journal = j-TKDD, volume = "19", number = "1", pages = "12:1--12:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701235", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3701235", abstract = "Tensor decompositions have been successfully applied to multidimensional data recovery. However, classical tensor decompositions are not suitable for emerging spatio-irregular multidimensional data (i.e., spatio-irregular tensor), whose spatial domain is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2025:PML, author = "Weichao Liang and Guangliang Gao and Lei Chen and Youquan Wang", title = "Partial Multi-Label Learning via Exploiting Instance and Label Correlations", journal = j-TKDD, volume = "19", number = "1", pages = "13:1--13:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3700879", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3700879", abstract = "The goal of partial multi-label learning is to induce a multi-label classifier from partial multi-label data where each instance is annotated with a number of candidate labels but only a subset of them are valid. Many of the existing studies either fail \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:GCF, author = "Zhixiao Wang and Jiayu Zhao and Chengcheng Sun and Xiaobin Rui and Philip S. Yu", title = "A General Concave Fairness Framework for Influence Maximization Based on Poverty Reward", journal = j-TKDD, volume = "19", number = "1", pages = "14:1--14:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701737", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3701737", abstract = "Influence maximization (IM) aims to find a group of influential nodes as initial spreaders to maximize the influence spread over a network. Yet, traditional IM algorithms have not been designed with fairness in mind, resulting in discrimination against \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:DTE, author = "Haoqiang Liu and Weikang Su and Tong Li and Wenzhen Huang and Yong Li", title = "Digital Twin Enhanced Multi-Agent Reinforcement Learning for Large-Scale Mobile Network Coverage Optimization", journal = j-TKDD, volume = "19", number = "1", pages = "15:1--15:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3702644", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3702644", abstract = "With the rapid advancement of communication technology and the exponential growth of mobile users, improving network coverage quality and throughput has become increasingly important. In particular, large-scale Base Station (BS) cooperative optimization \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jaysawal:2025:SEO, author = "Bijay Prasad Jaysawal and Jen-Wei Huang", title = "{SOHUPDS+}: an Efficient One-phase Algorithm for Mining High Utility Patterns over a Data Stream", journal = j-TKDD, volume = "19", number = "1", pages = "16:1--16:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3702645", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3702645", abstract = "Existing algorithms for mining high utility patterns over a data stream are two-phase algorithms that are not scalable due to the large number of candidates generation in the first phase, particularly when the minimum utility threshold is low. Moreover, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2025:QEW, author = "Jianwen Sun and Shangheng Du and Jianpeng Zhou and Xin Yuan and Xiaoxuan Shen and Ruxia Liang", title = "Question Embedding on Weighted Heterogeneous Information Network for Knowledge Tracing", journal = j-TKDD, volume = "19", number = "1", pages = "17:1--17:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703158", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3703158", abstract = "Knowledge Tracing (KT) aims to predict students' future performance on answering questions based on their historical exercise sequences. To alleviate the problem of data sparsity in KT, recent works have introduced auxiliary information to mine question \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guan:2025:JSA, author = "Zhihao Guan and Jia-Qi Yang and Yang Yang and Hengshu Zhu and Wenjie Li and Hui Xiong", title = "{JobFormer}: Skill-Aware Job Recommendation with Semantic-Enhanced Transformer", journal = j-TKDD, volume = "19", number = "1", pages = "18:1--18:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701735", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3701735", abstract = "Job recommendation aims to provide potential talents with suitable job descriptions (JDs) consistent with their career trajectory, which plays an essential role in proactive talent recruitment. In real-world management scenarios, the available JD-user \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hong:2025:MMD, author = "Xiaobin Hong and Jiangyi Hu and Taishan Xu and Xiancheng Ren and Feng Wu and Xiangkai Ma and Wenzhong Li", title = "{MagNet}: Multilevel Dynamic Wavelet Graph Neural Network for Multivariate Time Series Classification", journal = j-TKDD, volume = "19", number = "1", pages = "19:1--19:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703915", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3703915", abstract = "Multivariate Time Series Classification (MTSC) is a fundamental data mining task, which is widely applied in the fields like health care and energy management. However, the existing MTSC methods are mostly adapted from univariate versions and model the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2025:GMB, author = "Yuchen Wu and Huandong Wang and Changzheng Gao and Depeng Jin and Yong Li", title = "{GeoGail}: a Model-Based Imitation Learning Framework for Human Trajectory Synthesizing", journal = j-TKDD, volume = "19", number = "1", pages = "20:1--20:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3699961", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3699961", abstract = "Synthesized human trajectories are crucial for a large number of applications. Existing solutions are mainly based on the generative adversarial network (GAN), which is limited due to the lack of modeling the human decision-making process. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hang:2025:DPL, author = "Jun-Yi Hang and Min-Ling Zhang", title = "Dual Perspective of Label-Specific Feature Learning for Multi-Label Classification", journal = j-TKDD, volume = "19", number = "1", pages = "21:1--21:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705006", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3705006", abstract = "Label-specific features work as an effective supervised feature manipulation strategy to account for distinct discriminative properties of each class label in multi-label classification. Existing approaches implement this strategy in its primal form, i.e.,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Song:2025:CHT, author = "Yiwen Song and Jingtao Ding and Jian Yuan and Qingmin Liao and Yong Li", title = "Controllable Human Trajectory Generation Using Profile-Guided Latent Diffusion", journal = j-TKDD, volume = "19", number = "1", pages = "22:1--22:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701736", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3701736", abstract = "Trajectory generation is a vital element in AI applications. Firstly, it enables simulation such as traffic simulation and epidemic spreading modeling. Secondly, it can provide synthetic privacy-preserving data for training AI models. Notably, trajectory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2025:SDN, author = "Jiong Zhu and Aishwarya Reganti and Edward W. Huang and Charles Dickens and Nikhil Rao and Karthik Subbian and Danai Koutra", title = "Simplifying Distributed Neural Network Training on Massive Graphs: Randomized Partitions Improve Model Aggregation", journal = j-TKDD, volume = "19", number = "1", pages = "23:1--23:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701563", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3701563", abstract = "Distributed graph neural network (GNN) training facilitates learning on massive graphs that surpass the storage and computational capabilities of a single machine. Traditional distributed frameworks strive for performance parity with centralized training \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:MRT, author = "Kaijun Liu and Sijie Ruan and Cheng Long and Liang Yu", title = "Modeling On-road Trajectories with Multi-task Learning", journal = j-TKDD, volume = "19", number = "1", pages = "24:1--24:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705005", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3705005", abstract = "With the increasing popularity of GPS modules, there are various urban applications such as car navigation relying on trajectory data modeling. In this work, we study the problem of modeling on-road trajectories, which is to predict the next road segment \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2025:SAM, author = "Kun He and Xiaodong Xin and Jialu Bao and Meng Wang and Bart Selman and John E. Hopcroft", title = "Structure Amplification on Multi-layer Stochastic Block Models", journal = j-TKDD, volume = "19", number = "1", pages = "25:1--25:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706111", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3706111", abstract = "Much of the complexity of social, biological, and engineering systems arises from the complicated interactions among the entities in the corresponding networks. A number of network analysis tools have been successfully used to discover latent structures \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qin:2025:MAN, author = "Yalan Qin and Nan Pu and Hanzhou Wu and Nicu Sebe", title = "Margin-aware Noise-robust Contrastive Learning for Partially View-aligned Problem", journal = j-TKDD, volume = "19", number = "1", pages = "26:1--26:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3707646", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3707646", abstract = "In this article, we study a challenging problem in contrastive learning when just a portion of data is aligned in multi-view dataset due to temporal, spatial, or spatio-temporal asynchronism across views. It is important to study partially view-aligned \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liang:2025:DFR, author = "Ruicheng Liang and Yuanchun Jiang and Feida Zhu and Ling Cheng and Huiwen Liu", title = "Defending Federated Recommender Systems against Untargeted Attacks: a Contribution-Aware Robust Aggregation Scheme", journal = j-TKDD, volume = "19", number = "1", pages = "27:1--27:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706112", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Tue Feb 4 06:19:23 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", URL = "https://dl.acm.org/doi/10.1145/3706112", abstract = "Federated recommender systems (FedRSs) effectively tackle the tradeoff between recommendation accuracy and privacy preservation. However, recent studies have revealed severe vulnerabilities in FedRSs, particularly against untargeted attacks seeking to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Aydin:2025:BFE, author = "Soner Aydin and Sinan Yildirim", title = "{Bayesian} Frequency Estimation under Local Differential Privacy with an Adaptive Randomized Response Mechanism", journal = j-TKDD, volume = "19", number = "2", pages = "28:1--28:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706584", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Frequency estimation plays a critical role in many applications involving personal and private categorical data. Such data are often collected sequentially over time, making it valuable to estimate their distribution online while preserving privacy. We \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:AAS, author = "Zimu Wang and Hao Zou and Jiashuo Liu and Jiayun Wu and Pengfei Tian and Yue He and Peng Cui", title = "{AdaptSel}: Adaptive Selection of Biased and Debiased Recommendation Models for Varying Test Environments", journal = j-TKDD, volume = "19", number = "2", pages = "29:1--29:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706637", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recommendation systems are frequently challenged by pervasive biases in the training set that can compromise model effectiveness. To address this issue, various debiasing techniques have been developed to eliminate biases and produce debiased models. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cai:2025:PPT, author = "Qiqi Cai and Jian Cao and Yirong Chen and Shiyou Qian and Liangxiao Yuan and Jie Wang", title = "{PREFER}: a Pre-trained Model Recommendation Framework for Edge Computing Enabled Traffic Flow Prediction", journal = j-TKDD, volume = "19", number = "2", pages = "30:1--30:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3707464", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The recent years have witnessed a surge in the development of traffic flow prediction methods, often deployed on cloud platforms to offer predictive services for entire transportation networks. However, the processes of training and executing a model for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2025:ARS, author = "Qiang He and Zelin Zhang and Tingting Bi and Hui Fang and Xiushuang Yi and Keping Yu", title = "Adaptive Rumor Suppression on Social Networks: a Multi-Round Hybrid Approach", journal = j-TKDD, volume = "19", number = "2", pages = "31:1--31:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701738", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Rumor suppression is targeted at diminishing the impact of false and negative information within social networks by decreasing the prevalence of belief in such rumors among individuals, utilizing diverse strategies. Previous studies have broadly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mao:2025:LKD, author = "Zhengyang Mao and Wei Ju and Siyu Yi and Yifan Wang and Zhiping Xiao and Qingqing Long and Nan Yin and Xinwang Liu and Ming Zhang", title = "Learning Knowledge-diverse Experts for Long-tailed Graph Classification", journal = j-TKDD, volume = "19", number = "2", pages = "32:1--32:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705323", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph neural networks (GNNs) have shown remarkable success in graph-level classification tasks. However, most of the existing GNN-based studies are based on balanced datasets, while many real-world datasets exhibit long-tailed distributions. In such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xia:2025:MHT, author = "Haisong Xia and Wanyue Xu and Zuobai Zhang and Zhongzhi Zhang", title = "Means of Hitting Times for Random Walks on Graphs: Connections, Computation, and Optimization", journal = j-TKDD, volume = "19", number = "2", pages = "33:1--33:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708561", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "For random walks on graph $ \mathcal {G} $ with $n$ vertices and $m$ edges, the mean hitting time $ H_j $ from a vertex chosen from the stationary distribution to vertex $j$ measures the importance for $j$, while the Kemeny constant \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Di:2025:ELB, author = "Shimin Di and Yongqi Zhang and Quanming Yao and Xiaofang Zhou and Lei Chen", title = "Efficient Latent-based Scoring Function Search for {$N$}-ary Relational Knowledge Bases", journal = j-TKDD, volume = "19", number = "2", pages = "34:1--34:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3707644", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Designing a proper scoring function is the key to ensuring the excellent performance of knowledge base (KB) embedding. Recently, the scoring function search method introduces the automated machine learning technique to design the data-aware scoring \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moghaddam:2025:DTA, author = "Arya Hadizadeh Moghaddam and Mohsen Nayebi Kerdabadi and Bin Liu and Mei Liu and Zijun Yao", title = "Discovering Time-aware Hidden Dependencies with Personalized Graphical Structure in Electronic Health Records", journal = j-TKDD, volume = "19", number = "2", pages = "35:1--35:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3709143", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Over the past decade, significant advancements in mining electronic health records (EHRs) have enabled a broad range of decision-support applications and offered an unprecedented capacity for predicting critical events such as disease prognosis and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:MAL, author = "Huan Wang and Yu Teng and Lingsong Qin and Xuan Guo and Po Hu", title = "A Multiple Attention Layer-shareable Method for Link Prediction in Multilayer Networks", journal = j-TKDD, volume = "19", number = "2", pages = "36:1--36:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3709142", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Link prediction in multilayer networks aims to predict missing links at the target layer by incorporating structural information from both auxiliary layers and the target layer. Existing methods tend to learn layer-specific knowledge to maximize the link \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "36", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:CGC, author = "Yingxue Zhang and Yanhua Li and Xun Zhou and Zhenming Liu and Jun Luo", title = "{C$^3$-GAN+}: Complex-Condition-Controlled Generative Adversarial Networks with Enhanced Embedding", journal = j-TKDD, volume = "19", number = "2", pages = "37:1--37:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712264", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given historical traffic distributions and associated urban conditions observed in a city, the conditional urban traffic estimation problem aims at estimating realistic future projections of the traffic under a set of new urban conditions, e.g., new bus \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "37", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2025:POA, author = "Hanwen Hu and Zhangchi Han and Shiyou Qian and Dingyu Yang and Jian Cao and Guangtao Xue", title = "Pattern-oriented Attention Mechanism for Multivariate Time Series Forecasting", journal = j-TKDD, volume = "19", number = "2", pages = "38:1--38:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712606", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multivariate time series forecasting is applied in many domains, such as finance, transportation, and industry. The main challenge of precise forecasting lies in accurately capturing latent dependencies. Recent studies develop various frameworks to reduce \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "38", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fan:2025:ISI, author = "Wenqi Fan and Shu Zhao and Jiliang Tang", title = "Introduction for the Special Issue on Trustworthy Artificial Intelligence", journal = j-TKDD, volume = "19", number = "2", pages = "39:1--39:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712184", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "39", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2025:BFC, author = "Zhihao Hu and Yiran Xu and Mengnan Du and Jindong Gu and Xinmei Tian and Fengxiang He", title = "Boosting Fair Classifier Generalization through Adaptive Priority Reweighing", journal = j-TKDD, volume = "19", number = "2", pages = "40:1--40:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3665895", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the increasing penetration of machine learning applications in critical decision-making areas, calls for algorithmic fairness are more prominent. Although there have been various modalities to improve algorithmic fairness through learning with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "40", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:QCM, author = "Hengyuan Zhang and Zitao Liu and Chenming Shang and Dawei Li and Yong Jiang", title = "A Question-centric Multi-experts Contrastive Learning Framework for Improving the Accuracy and Interpretability of Deep Sequential Knowledge Tracing Models", journal = j-TKDD, volume = "19", number = "2", pages = "41:1--41:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3674840", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge tracing (KT) plays a crucial role in predicting students' future performance by analyzing their historical learning processes. Deep neural networks (DNNs) have shown great potential in solving the KT problem. However, there still exist some \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "41", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Rong:2025:EGE, author = "Yao Rong and Guanchu Wang and Qizhang Feng and Ninghao Liu and Zirui Liu and Enkelejda Kasneci and Xia Hu", title = "Efficient {GNN} Explanation via Learning Removal-based Attribution", journal = j-TKDD, volume = "19", number = "2", pages = "42:1--42:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3685678", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "As Graph Neural Networks (GNNs) have been widely used in real-world applications, model explanations are required not only by users but also by legal regulations. However, simultaneously achieving high fidelity and low computational costs in generating \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "42", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:SAR, author = "Sihang Li and Yanchen Luo and An Zhang and Xiang Wang and Longfei Li and Jun Zhou and Tat-Seng Chua", title = "Self-attentive Rationalization for Interpretable Graph Contrastive Learning", journal = j-TKDD, volume = "19", number = "2", pages = "43:1--43:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3665894", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph augmentation is the key component to reveal instance-discriminative features of a graph as its rationale-an interpretation for it-in graph contrastive learning (GCL). Existing rationale-aware augmentation mechanisms in GCL frameworks roughly fall \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "43", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2025:FNL, author = "Guo Lin and Yongfeng Zhang", title = "Fuzzy Neural Logic Reasoning for Robust Classification", journal = j-TKDD, volume = "19", number = "2", pages = "44:1--44:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3704728", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The efficacy of neural networks is widely recognized across a multitude of machine learning tasks, yet their black-box nature impedes the understanding of their decision-making processes. Such lack of explainability limits their use in high-stake fields \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "44", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Dai:2025:TPB, author = "Enyan Dai and Suhang Wang", title = "Towards Prototype-Based Self-Explainable Graph Neural Network", journal = j-TKDD, volume = "19", number = "2", pages = "45:1--45:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3689647", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph Neural Networks (GNNs) have shown great ability in modeling graph-structured data for various domains. However, GNNs are known as black-box models that lack interpretability. Without understanding their inner working, we cannot fully trust them, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "45", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chuang:2025:FRM, author = "Yu-Neng Chuang and Kwei-Herng Lai and Ruixiang Tang and Mengnan Du and Chia-Yuan Chang and Na Zou and Xia Hu", title = "{Fair-RGNN}: Mitigating Relational Bias on Knowledge Graphs", journal = j-TKDD, volume = "19", number = "2", pages = "46:1--46:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3681792", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge graph data are prevalent in real-world applications, and knowledge graph neural networks (KGNNs) are essential techniques for knowledge graph representation learning. Although KGNN effectively models the structural information from knowledge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "46", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2025:NNC, author = "Yuxi Huang and Huandong Wang and Guanghua Liu and Yong Li and Tao Jiang", title = "{NeuralCODE}: Neural Compartmental Ordinary Differential Equations Model with {AutoML} for Interpretable Epidemic Forecasting", journal = j-TKDD, volume = "19", number = "2", pages = "47:1--47:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3694688", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In order to prevent the re-emergence of an epidemic, predicting its trend while gaining insight into the intrinsic factors affecting it is a key issue in urban governance. Traditional SIR-like compartment models provide insight into the explanatory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "47", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qian:2025:CUI, author = "Fulan Qian and Yuanjun Zou and Mengyao Xu and Xuejun Zhang and Chonghao Zhang and Chenchu Xu and Hai Chen", title = "A Comprehensive Understanding of the Impact of Data Augmentation on the Transferability of {$3$D} Adversarial Examples", journal = j-TKDD, volume = "19", number = "2", pages = "48:1--48:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3673232", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "3D point cloud classifiers exhibit vulnerability to imperceptible perturbations, which poses a serious threat to the security and reliability of deep learning models in practical applications, making the robustness evaluation of deep 3D point cloud models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "48", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiao:2025:IFI, author = "Meng Xiao and Min Wu and Ziyue Qiao and Yanjie Fu and Zhiyuan Ning and Yi Du and Yuanchun Zhou", title = "Interdisciplinary Fairness in Imbalanced Research Proposal Topic Inference: a Hierarchical Transformer-based Method with Selective Interpolation", journal = j-TKDD, volume = "19", number = "2", pages = "49:1--49:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3671149", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The objective of topic inference in research proposals aims to obtain the most suitable disciplinary division from the discipline system defined by a funding agency. The agency will subsequently find appropriate peer-review experts from their database \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "49", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2025:BRT, author = "Li Ma and Yongchao Liu and Xiaofeng Gao and Peng Zhang and Chuntao Hong", title = "Building Robust and Trustworthy {HGNN} Models: a Learnable Threshold Approach for Node Classification", journal = j-TKDD, volume = "19", number = "2", pages = "50:1--50:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3707645", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Message passing scheme is a general idea for Graph Neural Networks (GNNs) to learn node representations. During message passing, given a target node, we transform and aggregate the feature vectors of its neighbors and generate a representation vector for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "50", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sui:2025:SDA, author = "Yongduo Sui and Shuyao Wang and Jie Sun and Zhiyuan Liu and Qing Cui and Longfei Li and Jun Zhou and Xiang Wang and Xiangnan He", title = "A Simple Data Augmentation for Graph Classification: a Perspective of Equivariance and Invariance", journal = j-TKDD, volume = "19", number = "2", pages = "51:1--51:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706062", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In graph classification, the out-of-distribution (OOD) issue is attracting great attention. To address this issue, a prevailing idea is to learn stable features, on the assumption that they are substructures causally determining the label and that their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "51", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fan:2025:EPT, author = "Mingyuan Fan and Cen Chen and Chengyu Wang and Jun Huang", title = "Exploiting Pre-Trained Models and Low-Frequency Preference for Cost-Effective Transfer-based Attack", journal = j-TKDD, volume = "19", number = "2", pages = "52:1--52:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3680553", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The transferability of adversarial examples enables practical transfer-based attacks. However, existing theoretical analysis cannot effectively reveal what factors contribute to cross-model transferability. Furthermore, the assumption that the target \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "52", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:KTL, author = "Haochun Wang and Sendong Zhao and Zewen Qiang and Zijian Li and Chi Liu and Nuwa Xi and Yanrui Du and Bing Qin and Ting Liu", title = "Knowledge-tuning Large Language Models with Structured Medical Knowledge Bases for Trustworthy Response Generation in {Chinese}", journal = j-TKDD, volume = "19", number = "2", pages = "53:1--53:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3686807", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Large Language Models (LLMs) have demonstrated remarkable success in diverse natural language processing (NLP) tasks in general domains. However, LLMs sometimes generate responses with the hallucination about medical facts due to limited domain knowledge. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "53", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Cui:2025:SHA, author = "Peng Cui and Zhijie Deng and Wenbo Hu and Jun Zhu", title = "{SDE-HNN}: Accurate and Well-Calibrated Forecasting Using Stochastic Differential Equations", journal = j-TKDD, volume = "19", number = "2", pages = "54:1--54:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3691346", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:10 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "It is crucial yet challenging for deep learning models to properly characterize uncertainty that is pervasive in real-world environments. Heteroscedastic neural networks (HNNs) are promising methods that capture data uncertainty for forecasting problems \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "54", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:SBM, author = "Xueyan Liu and Wenzhuo Song and Katarzyna Musial and Yang Li and Xuehua Zhao and Bo Yang", title = "Stochastic Block Models for Complex Network Analysis: a Survey", journal = j-TKDD, volume = "19", number = "3", pages = "55:1--55:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3713076", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Complex networks enable to represent and characterize the interactions between entities in various complex systems which widely exist in the real world and usually generate vast amounts of data about all the elements, their behaviors and interactions over \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "55", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2025:IDL, author = "Mengzhuo Guo and Qingpeng Zhang and Daniel Dajun Zeng", title = "An Interpretable Deep Learning-based Model for Decision-making through Piecewise Linear Approximation", journal = j-TKDD, volume = "19", number = "3", pages = "56:1--56:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715150", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Full-complexity machine learning models, such as the deep neural network, are non-traceable black-box, whereas the classic interpretable models, such as linear regression models, are often over-simplified, leading to lower accuracy. Model interpretability \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "56", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:HHH, author = "Yongkang Li and Zipei Fan and Xuan Song", title = "Heterogeneous Hyperbolic Hypergraph Neural Network for Friend Recommendation in Location-based Social Networks", journal = j-TKDD, volume = "19", number = "3", pages = "57:1--57:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708999", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Friend recommendation is an important real-world application in Location-based Social Networks (LBSN), helping users discover potential friends and enhance their overall happiness. LBSN mainly comprises two distinct data structures: spatio-temporal data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "57", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yao:2025:EDS, author = "Dezhong Yao and Sanmu Li and Zhiwei Wang and Peilin Zhao and Gang Wu and Chen Yu and Hai Jin", title = "Efficient Distributed Sparse Relative Similarity Learning", journal = j-TKDD, volume = "19", number = "3", pages = "58:1--58:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712603", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Learning a good similarity measure for large-scale high-dimensional data is a crucial task in machine learning applications, yet it poses a significant challenge. Distributed minibatch Stochastic Gradient Descent (SGD) serves as an efficient optimization \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "58", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gao:2025:DMG, author = "Yibo Gao and Zhen Liu and Xinxin Yang and Sibo Lu and Yafan Yuan", title = "Disentangled Multi-Graph Convolution for Cross-Domain Recommendation", journal = j-TKDD, volume = "19", number = "3", pages = "59:1--59:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715151", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data sparsity poses a significant challenge for recommendation systems, prompting the research of Cross-Domain Recommendation ( CDR ). CDR aims to leverage more user-item interaction information from source domains to improve the recommendation performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "59", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shao:2025:NCS, author = "Yuanhang Shao and Xiuwen Liu", title = "Nonlinear Correct and Smooth for Graph-Based Semi-Supervised Learning", journal = j-TKDD, volume = "19", number = "3", pages = "60:1--60:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712604", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph-based semi-supervised learning (GSSL) has achieved significant success across various applications by leveraging the graph structure and labeled samples for classification tasks. In the field of GSSL, Label Propagation (LP) and Graph Neural Networks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "60", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:DVL, author = "Huan Zhang and Liangxiao Jiang and Wenjun Zhang and Geoffrey I. Webb", title = "Dual-View Learning from Crowds", journal = j-TKDD, volume = "19", number = "3", pages = "61:1--61:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712605", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Crowdsourcing services provide a fast and cheap way to obtain substantial labeled data by employing crowd workers on the Internet. In crowdsourcing learning, two-stage methods have been widely used, which first infer the integrated label for each instance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "61", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2025:SFP, author = "Peng Lin and Martin Neil and Norman Fenton", title = "Stacking Factorizing Partitioned Expressions in Hybrid {Bayesian} Network Models", journal = j-TKDD, volume = "19", number = "3", pages = "62:1--62:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3714473", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Hybrid Bayesian networks (HBN) contain complex conditional probability distributions (CPD) specified as partitioned expressions over discrete and continuous variables. The size of these CPDs grows exponentially with the number of parent nodes, and when \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "62", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:FWP, author = "Sisi Wang and Feiping Nie and Zheng Wang and Rong Wang and Xuelong Li", title = "Fuzzy Weighted Principal Component Analysis for Anomaly Detection", journal = j-TKDD, volume = "19", number = "3", pages = "63:1--63:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715148", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Principal Component Analysis (PCA) is one of the most famous unsupervised dimensionality reduction algorithms and has been widely used in many fields. However, it is very sensitive to outliers, which reduces the robustness of the algorithm. In recent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "63", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Abdallah:2025:EFT, author = "Mustafa Abdallah and Ryan A. Rossi and Kanak Mahadik and Sungchul Kim and Handong Zhao and Saurabh Bagchi", title = "Evaluation-free Time-series Forecasting Model Selection via Meta-learning", journal = j-TKDD, volume = "19", number = "3", pages = "64:1--64:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715149", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Time-series forecasting models are invariably used in a variety of domains for crucial decision-making. Traditionally these models are constructed by experts with considerable manual effort. Unfortunately, this approach has poor scalability while \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "64", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2025:OSS, author = "Peng Zhou and Qi Wang and Yunyun Zhang and Zhaolong Ling and Shu Zhao and Xindong Wu", title = "Online Stable Streaming Feature Selection via Feature Aggregation", journal = j-TKDD, volume = "19", number = "3", pages = "65:1--65:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715918", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Feature selection is an essential pre-process component in data mining that aims to select the most relevant features from the target dataset. Datasets are always dynamic in real-world applications, and features may exist in stream mode. Then, online \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "65", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Paul:2025:MMT, author = "Jayanta Paul and Siddhartha Mallick and Abhijit Mitra and Anuska Roy and Jaya Sil", title = "Multi-modal {Twitter} Data Analysis for Identifying Offensive Posts Using a Deep Cross-Attention-based Transformer Framework", journal = j-TKDD, volume = "19", number = "3", pages = "66:1--66:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3713077", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In today's society dissemination of information among the individuals occur very rapidly due to the widespread usage of social media platforms like Twitter (now-a-days acclaimed as X). However, information may pose challenges to maintaining a healthy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "66", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2025:GII, author = "Paiheng Xu and Yuhang Zhou and Bang An and Wei Ai and Furong Huang", title = "{GFairHint}: Improving Individual Fairness for Graph Neural Networks via Fairness Hint", journal = j-TKDD, volume = "19", number = "3", pages = "67:1--67:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3714472", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Given the growing concerns about fairness in machine learning and the impressive performance of Graph Neural Networks (GNNs) on graph data learning, algorithmic fairness in GNNs has attracted significant attention. While many existing studies improve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "67", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Susnjak:2025:ARS, author = "Teo Susnjak and Peter Hwang and Napoleon Reyes and Andre L. C. Barczak and Timothy McIntosh and Surangika Ranathunga", title = "Automating Research Synthesis with Domain-Specific Large Language Model Fine-Tuning", journal = j-TKDD, volume = "19", number = "3", pages = "68:1--68:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715964", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This research pioneers the use of fine-tuned Large Language Models (LLMs) to automate Systematic Literature Reviews (SLRs), presenting a significant and novel contribution in integrating AI to enhance academic research methodologies. Our study employed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "68", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:TSC, author = "Chenyang Wang and Ling Luo and Uwe Aickelin", title = "Time Series Classification with Elasticity Using Augmented Path Signatures", journal = j-TKDD, volume = "19", number = "3", pages = "69:1--69:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715702", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We often compare time-dependent data elastically such that some compression or dilation along the time dimension can be ignored, for example, spatial trajectories of vehicles moving at different speeds or accelerometer data for exercises completed at \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "69", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gu:2025:TBK, author = "Xiao Gu and Ling Jian and Chongzhi Rao and Zhaohui Bu and Xianggang Cheng", title = "Together Is Better: Knowledge-aware Model with Resume Fusion for Online Job Recommendation", journal = j-TKDD, volume = "19", number = "3", pages = "70:1--70:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716503", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Widespread adoption of online recruitment platforms has led to explosive growth in employment information, resulting in an ever-increasing demand from job seekers for accurate and effective job recommendations. Existing studies on the Person-Job Fit \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "70", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gallego-Fontenla:2025:GDD, author = "V{\'\i}ctor Gallego-Fontenla and Pedro Gamallo-Fernandez and Juan C. Vidal and Manuel Lama", title = "Gradual Drift Detection in Process Models Using Conformance Metrics", journal = j-TKDD, volume = "19", number = "3", pages = "71:1--71:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716169", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Changes, planned or unexpected, are common during the execution of real-life processes. Detecting these changes is a must for optimizing the performance of organizations running such processes. Most of the algorithms present in the state-of-the-art focus \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "71", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lin:2025:MAB, author = "Mingwei Lin and Hengshuo Yang and Xiuqin Xu and Ling Lin and Zeshui Xu and Xin Luo", title = "Momentum-Accelerated and Biased Unconstrained Non-Negative Latent Factor Model for Handling High-Dimensional and Incomplete Data", journal = j-TKDD, volume = "19", number = "3", pages = "72:1--72:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3717069", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "High-dimensional and incomplete (HDI) data are involved frequently in big data-related industrial applications. Latent factor (LF) analysis aims at extracting the knowledge of great value from such extremely sparse HDI data efficiently. Non-negative LF \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "72", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jing:2025:CCC, author = "Xuechun Jing and Fuyuan Cao and Kui Yu and Jiye Liang", title = "{CM-CaFE}: a Clustering Method with Causality-based Feature Embedding", journal = j-TKDD, volume = "19", number = "3", pages = "73:1--73:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3717068", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Clustering is a fundamental technique widely used for exploring the inherent data structure. Many studies indicate that an appropriate feature representation can effectively improve clustering performance. However, the existing feature representation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "73", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhu:2025:SWL, author = "Yu Zhu and Ou Wu and Fengguang Su", title = "Subclass-wise Logit Perturbation for Multi-label Learning", journal = j-TKDD, volume = "19", number = "3", pages = "74:1--74:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715919", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Logit perturbation refers to adding perturbation on logit, which has been shown to be capable of enhancing the robustness and generalization capabilities of deep neural networks in machine learning. However, studies on logit perturbation for multi-label \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "74", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:SMC, author = "Jian Ying Liu and Chaowei Zhang and Min Zhang and Xiao Qin and Jifu Zhang", title = "Similarity Metrics: {Chebyshev} {Coulomb} Force and Resultant Force for High-Dimensional Data", journal = j-TKDD, volume = "19", number = "3", pages = "75:1--75:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715963", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The similarity metric has garnered widespread attention thanks to its potential applications in the fields of data mining, machine learning, and so on. Due to the interference of ``distance concentration'' caused by ``Curse of dimensionality,'' however, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "75", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kou:2025:OFI, author = "Yannian Kou and Qiuqiang Lin and Chuanhou Gao", title = "{ORIC}: Feature Interaction Detection through Online Random Interaction Chains for Click-Through Rate Prediction", journal = j-TKDD, volume = "19", number = "3", pages = "76:1--76:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3717070", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Click-through rate prediction aims to predict the ratio of clicks to impressions of a specific link, which is challenging due to (1) extremely high-dimensional categorical features; (2) both important original features and their interactions; and (3) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "76", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Han:2025:HEM, author = "Xiaolin Han and Yikun Zhang and Chenhao Ma and Xuequn Shang and Reynold Cheng and Tobias Grubenmann and Xiaodong Li", title = "Hypergraph-Enhanced Multi-Granularity Stochastic Weight Completion in Sparse Road Networks", journal = j-TKDD, volume = "19", number = "3", pages = "77:1--77:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719013", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Road network applications, such as navigation, incident detection, and Point-of-Interest (POI) recommendation, make extensive use of network edge weights (e.g., traveling times). Some of these weights can be missing, especially in a road network where \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "77", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Luo:2025:EDA, author = "Xinjian Luo and Xianglong Zhang", title = "Exploiting Defenses against {GAN}-Based Feature Inference Attacks in Federated Learning", journal = j-TKDD, volume = "19", number = "3", pages = "78:1--78:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719350", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Federated Learning (FL) is a decentralized model training framework that aims to merge isolated data islands while maintaining data privacy. However, recent studies have revealed that Generative Adversarial Network (GAN)-based attacks can be employed in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "78", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fang:2025:OEO, author = "Xiangfei Fang and Chengying Huan and Heng Zhang and Yongchao Liu and Shaonan Ma and Yanjun Wu and Chen Zhao", title = "{OTM}: Efficient $k$-Order-Based Core Maintenance in Large-Scale Dynamic Hypergraphs", journal = j-TKDD, volume = "19", number = "3", pages = "79:1--79:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719205", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Apr 12 07:57:12 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The k -core model has garnered widespread adoption for preserving essential cohesive subgraphs owing to its linear-time computability, making it particularly suitable for hypergraph analysis. However, considering the continuously evolving characteristics \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "79", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2025:AMB, author = "Xiuqin Xu and Mingwei Lin and Zeshui Xu and Xin Luo", title = "Attention-Mechanism-Based Neural Latent-Factorization-of-Tensors Model", journal = j-TKDD, volume = "19", number = "4", pages = "80:1--80:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719295", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "High-Dimensional and Incomplete (HDI) tensors contain a wealth of knowledge and patterns, which are typically utilized to characterize complex relationships between entities in a variety of industrial applications. Currently, the neural network-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "80", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Nguyen:2025:TSP, author = "Minh Duc Nguyen and Viet Cuong Ta", title = "Temporal Structural Preserving with Subtree Attention in Dynamic Graph Transformers", journal = j-TKDD, volume = "19", number = "4", pages = "81:1--81:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3720549", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Dynamic graph learning is a rapidly developing area of research due to its widespread application in various real-world networks. Most existing works combine graph neural networks and sequential models to exploit the graph topology and the temporal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "81", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:FGB, author = "Yang Li and Di Wang and Jos{\'e} M. F. Moura", title = "Forecasting Graph-Based Time-Dependent Data with Graph Sequence Attention", journal = j-TKDD, volume = "19", number = "4", pages = "82:1--82:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721435", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Forecasting graph-based, time-dependent data has broad practical applications but presents challenges. Effective models must capture both spatial and temporal dependencies in the data, while also incorporating auxiliary information to enhance prediction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "82", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2025:OMD, author = "Yaling Zhao and Lei Tang and Yunji Liang and Zeyu He and Junchi Ma", title = "Optimizing Matching for On-Demand Ride-Pooling with Stochastic Day-to-Day Dynamics", journal = j-TKDD, volume = "19", number = "4", pages = "83:1--83:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721434", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Ride-pooling significantly reduces traffic congestion by enhancing fleet utilization through effective ride-matching. Real-world ride-pooling systems are dynamic, with fluctuations in driver availability and demand throughout the day. This necessitates \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "83", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bai:2025:VVG, author = "Lin Bai and Caiyan Jia and Ziying Song and Chaoqun Cui", title = "{VGA}: Vision and Graph Fused Attention Network for Rumor Detection", journal = j-TKDD, volume = "19", number = "4", pages = "84:1--84:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722225", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the development of social media, rumors have been spread broadly on social media platforms, causing great harm to society. Beside textual information, many rumors also use manipulated images or conceal textual information within images to deceive \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "84", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shi:2025:DDE, author = "Hongyu Shi and Ling Chen and Xing Tang and Dandan Lyu", title = "{DHFM}: Diversity-Enhanced Hypergraph Factorization Machines for Feature Interaction Modeling", journal = j-TKDD, volume = "19", number = "4", pages = "85:1--85:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721982", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Feature interaction modeling, which exploits interactive information between features, has been widely explored in various applications. Recently, many graph neural network (GNN)-based models are proposed to model feature interactions by predicting the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "85", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:MRN, author = "Yan Li and Zhulin Wang and Jing Liu and Lei Guo and Philippe Fournier-Viger and Youxi Wu and Xindong Wu", title = "Mining Repetitive Negative Sequential Patterns with Gap Constraints", journal = j-TKDD, volume = "19", number = "4", pages = "86:1--86:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716390", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Sequential pattern mining (SPM) with gap constraints (or repetitive SPM or tandem repeat discovery in bioinformatics) can find frequent repetitive subsequences satisfying gap constraints, which are called positive sequential patterns with gap constraints \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "86", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:DBS, author = "Chen Liu and Shibo He and Shizhong Li and Zhenyu Shi and Wenchao Meng", title = "Detecting Both Seen and Unseen Anomalies in Time Series", journal = j-TKDD, volume = "19", number = "4", pages = "87:1--87:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3717071", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "A plethora of methods for time-series anomaly detection have surfaced recently, encompassing both supervised and unsupervised settings. However, few approaches are designed to accommodate both settings simultaneously. Moreover, methods tailored for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "87", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:RLB, author = "Dongjie Li and Dong Li and Guang Lian", title = "Representation Learning Based on Ordinary Differential Equations for Dynamic Networks", journal = j-TKDD, volume = "19", number = "4", pages = "88:1--88:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723359", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Representation learning on networks, mapping the network into a low-dimensional vector space, has received signification attention recently due to its widespread application in graph data mining tasks. With the success of representation learning in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "88", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zong:2025:DRL, author = "Zefang Zong and Tao Feng and Jingwei Wang and Tong Xia and Yong Li", title = "Deep Reinforcement Learning for Demand-Driven Services in Logistics and Transportation Systems: a Survey", journal = j-TKDD, volume = "19", number = "4", pages = "89:1--89:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708325", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recent technology development brings the boom of numerous new Demand-Driven Services (DDS) into urban lives, including ridesharing, on-demand delivery, express systems, and warehousing. In DDS, a service loop is an elemental structure, including its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "89", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yan:2025:OCE, author = "Hao Yan and Senzhang Wang and Chaozhuo Li and Jun Yin and Philip S. Yu and Jianxin Wang", title = "Have Our Cake and Eat It: Augmentation Diversity and Semantic Consistency Balanced Graph Contrastive Learning", journal = j-TKDD, volume = "19", number = "4", pages = "90:1--90:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3728646", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Self-supervised learning on graph neural networks is receiving increasing attention due to the difficulty of obtaining graph labels in many real applications. Graph contrastive learning (GCL), a recently popular method for self-supervised learning on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "90", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2025:CDN, author = "Fa-You Chen and Yun-Jui Hsu and Chia-Hsun Lu and Hong-Han Shuai and Lo-Yao Yeh and Chih-Ya Shen", title = "Compressing Deep Neural Networks with Goal-Specific Pruning and Self-Distillation", journal = j-TKDD, volume = "19", number = "4", pages = "91:1--91:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721293", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Neural network (NN) compression aims at reducing the model size and receives much research attention. Nevertheless, we observe that when compressing convolutional neural networks (CNNs), previous approaches may not well measure the impact of filters to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "91", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:DSE, author = "Tianchun Wang and Dongsheng Luo and Wei Cheng and Haifeng Chen and Xiang Zhang", title = "{DyExplainer}: Self-explainable Dynamic Graph Neural Network with Sparse Attentions", journal = j-TKDD, volume = "19", number = "4", pages = "92:1--92:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3729173", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph Neural Networks (GNNs) resurge as a trending research subject owing to their impressive ability to capture representations from graph-structured data. However, the black-box nature of GNNs presents a significant challenge in terms of comprehending \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "92", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Pellizzoni:2025:FDC, author = "Paolo Pellizzoni and Andrea Pietracaprina and Geppino Pucci", title = "Fully Dynamic Clustering and Diversity Maximization in Doubling Metrics", journal = j-TKDD, volume = "19", number = "4", pages = "93:1--93:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3727881", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We present approximation algorithms for some variants of k -center clustering and diversity maximization in a fully dynamic setting, where the active pointset evolves through arbitrary insertions and deletions. All algorithms employ a coreset-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "93", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:MST, author = "Zhanyu Liu and Guanjie Zheng and Yanwei Yu", title = "Multi-scale Traffic Pattern Bank for Cross-city Few-shot Traffic Forecasting", journal = j-TKDD, volume = "19", number = "4", pages = "94:1--94:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3727622", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Traffic forecasting is crucial for intelligent transportation systems (ITS), aiding in efficient resource allocation and effective traffic control. However, its effectiveness often relies heavily on abundant traffic data, while many cities lack \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "94", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xia:2025:NPS, author = "Hongbin Xia and Xiangzhong Meng and Yuan Liu", title = "Non-Parallel Story Author-Style Transfer with Disentangled Representation Learning", journal = j-TKDD, volume = "19", number = "4", pages = "95:1--95:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3726870", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Non-parallel story author-style transfer is an important but challenging task in natural language process, which requires transferring an input story into another author-style while maintaining source semantics. Despite recent progress, current text \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "95", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2025:PPM, author = "Congcong Chen and Lifei Wei and Jintao Xie and Yang Shi", title = "Privacy-Preserving Machine Learning Based on Cryptography: a Survey", journal = j-TKDD, volume = "19", number = "4", pages = "96:1--96:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3729234", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Fri May 16 07:16:02 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Machine learning has profoundly influenced various aspects of our lives. However, privacy breaches have caused significant unease and concern among the general public. Preserving the privacy of sensitive data during the training and inference phases of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "96", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Micale:2025:MSM, author = "Giovanni Micale and Antonio {Di Maria} and Roberto Grasso and Vincenzo Bonnici and Alfredo Ferro and Dennis Shasha and Rosalba Giugno and Alfredo Pulvirenti", title = "{MultiGraphMatch}: a Subgraph Matching Algorithm for Multigraphs", journal = j-TKDD, volume = "19", number = "5", pages = "97:1--97:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3728361", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Subgraph matching is the problem of finding all the occurrences of a small graph, called the query, in a larger graph, called the target. Although the problem has been widely studied in simple graphs, few solutions have been proposed for multigraphs, in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "97", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Mozhdehi:2025:SSM, author = "Arash Mozhdehi and Yunli Wang and Sun Sun and Xin Wang", title = "{SED2AM}: Solving Multi-Trip Time-Dependent Vehicle Routing Problem Using Deep Reinforcement Learning", journal = j-TKDD, volume = "19", number = "5", pages = "98:1--98:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721983", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Deep Reinforcement Learning (DRL)-based frameworks, featuring Transformer-style policy networks, have demonstrated their efficacy across various Vehicle Routing Problem (VRP) variants. However, the application of these methods to the Multi-Trip Time-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "98", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lu:2025:BSB, author = "Moli Lu and Linhao Luo and Xiaofeng Zhang", title = "Beyond Static Boundaries: Unraveling Temporal Overlapping Communities with Information Bottleneck Guidance", journal = j-TKDD, volume = "19", number = "5", pages = "99:1--99:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716391", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community detection has gained significant research interest within the data mining field. It involves identifying subsets of nodes with dense internal connections and sparse external connections. Most studies on community detection focus solely on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "99", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hossain:2025:PMF, author = "Mst Shamima Hossain and Christos Faloutsos and Boris Baer and Hyoseung Kim and Vassilis J. Tsotras", title = "Principled Mining, Forecasting, and Monitoring of Honeybee Time Series with {EBV+}", journal = j-TKDD, volume = "19", number = "5", pages = "100:1--100:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719014", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Honeybees, as natural crop pollinators, play a significant role in biodiversity and food production for human civilization. Bees actively regulate hive temperature (homeostasis) to maintain a colony's proper functionality. Deviations from usual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "100", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2025:LRS, author = "Hongjia Xu and Liangliang Zhang and Yao Ma and Sheng Zhou and Zhuonan Zheng and Jiajun Bu", title = "Learning to Reduce the Scale of Large Graphs: a Comprehensive Survey", journal = j-TKDD, volume = "19", number = "5", pages = "101:1--101:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3729427", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph data, prevalent across domains like social networks, biological systems, and recommendation systems, presents significant challenges due to its large scale and complex structure. The advent of Graph Neural Networks (GNNs) has revolutionized graph \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "101", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sang:2025:MGI, author = "Chun-Yan Sang and Ming Gong and Shi-Gen Liao and Wei Zhou", title = "{MA-GCL4SR}: Improving Graph Contrastive Learning-Based Sequential Recommendation with Model Augmentation", journal = j-TKDD, volume = "19", number = "5", pages = "102:1--102:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722561", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Sequential recommendation (SR) has leveraged the advantages of graph contrastive learning (GCL) to enhance the representation of SR, which mitigates to some extent the constraint of scarce labeled data for supervision in SR. Existing work applies general \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "102", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2025:DDL, author = "Liming Xu and Yongheng Wang and Chunlin He and Quan Tang and Xianhua Zeng and Jiancheng Lv", title = "Deep Disease Label-guided Graph Convolutional Network for Medical Report Generation", journal = j-TKDD, volume = "19", number = "5", pages = "103:1--103:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722226", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Medical report generation which extracts pathological information within medical images and subsequently produces diagnostic text autonomously aims to alleviate the workload of medical experts and offers auxiliary support in diagnoses. Despite some \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "103", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jirachanchaisiri:2025:CCD, author = "Pongsakorn Jirachanchaisiri and Saranya Maneeroj and Atsuhiro Takasu", title = "{COLANet}: Cross-Domain Recommender Systems with Latent Overlapping Items on Graph Neural Networks", journal = j-TKDD, volume = "19", number = "5", pages = "104:1--104:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3730404", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Cross-domain recommender systems (CDRSs) enhance recommendations by transferring knowledge of overlapping users across two domains. Deep canonical correlation analysis (DCCA) shows promising results in CDRSs by maximizing correlations between \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "104", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xu:2025:TPL, author = "Lanling Xu and Junjie Zhang and Bingqian Li and Jinpeng Wang and Sheng Chen and Wayne Xin Zhao and Ji-Rong Wen", title = "Tapping the Potential of Large Language Models as Recommender Systems: a Comprehensive Framework and Empirical Analysis", journal = j-TKDD, volume = "19", number = "5", pages = "105:1--105:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3726871", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recently, Large Language Models (LLMs) such as ChatGPT have showcased remarkable abilities in solving general tasks, demonstrating the potential for applications in recommender systems. To assess how effectively LLMs can be used in recommendation tasks, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "105", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:RNL, author = "Fenglin Liu and Xuancheng Ren and Guangxiang Zhao and Chenyu You and Sherry Ma and Xian Wu and Wei Fan and Xu Sun", title = "Rethinking Natural Language Generation with Layer-Wise Multi-View Decoding", journal = j-TKDD, volume = "19", number = "5", pages = "106:1--106:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3729536", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In natural language generation, language models, particularly those based on decoder-only architectures as in popular Large Language Models (LLMs), have demonstrated impressive performance across a wide range of tasks. However, encoder-decoder \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "106", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zheng:2025:RCL, author = "Xiulin Zheng and Peipei Li and Zan Zhang and Jia Wu and Xindong Wu", title = "A Relation-Constraint Link Prediction Model for Dynamic Knowledge Graphs with Entity Drift", journal = j-TKDD, volume = "19", number = "5", pages = "107:1--107:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725815", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge Graphs (KGs) often suffer from incompleteness and this issue motivates the task of Knowledge Graph Completion (KGC). Traditional KGC models mainly concentrate on static KGs with a fixed set of entities and relations, or dynamic KGs with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "107", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:FBH, author = "Dong Li and Aijia Zhang and Huan Xiong and Biqing Qi and Junqi Gao", title = "{FDphormer}: Beyond Homophily with Feature-Difference Position Encoding", journal = j-TKDD, volume = "19", number = "5", pages = "108:1--108:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3727882", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph Transformers have garnered significant attention due to their ability to address the challenges of long-distance interactions in previous GNNs. However, most current graph Transformers face difficulties when dealing with heterophilic graphs. To \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "108", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2025:CCD, author = "Lianyu Hu and Mudi Jiang and Yan Liu and Quan Zou and Zengyou He", title = "Clustering Categorical Data via Multiple Hypothesis Testing", journal = j-TKDD, volume = "19", number = "5", pages = "109:1--109:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3735977", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Categorical data clustering is a fundamental data mining problem, which has been extensively studied during the past decades. To date, many effective clustering algorithms for categorical data are available in the literature. However, almost all existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "109", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gupta:2025:CSG, author = "Shubham Gupta and Suman Kundu", title = "Communities in Streaming Graphs: Small Space Data Structure, Benchmark Data Generation, and Linear Algorithm", journal = j-TKDD, volume = "19", number = "5", pages = "110:1--110:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3735976", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Identifying and preserving community structures in a streaming graph is a very challenging task. However, many applications require the identification of these communities in very limited space and time. In this article, we design Community Sketch, a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "110", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ai:2025:IAR, author = "Yuyan Ai and Chaoqun Li and Liangxiao Jiang", title = "Intent-aware Recommendation Based on Principal Component Analysis", journal = j-TKDD, volume = "19", number = "5", pages = "111:1--111:??", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3731761", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:25 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In recommender systems, exploring user intents allows for a better understanding and exploration of user preferences, thereby improving recommendation performance. However, existing methods for modeling user intents often do so by statically setting the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "111", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Alfas:2025:EFE, author = "Muhammad Alfas and Manoj Kumar and Shaurya Shriyam and Sandeep Kumar", title = "An Efficient Framework for Epidemiological Parameter Estimation via Graph Reduction and Graph Neural Networks", journal = j-TKDD, volume = "19", number = "6", pages = "112:1--112:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736727", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "We propose an epidemiological parameter estimation framework based on contact networks and graph neural networks (GNNs). Contact network-based epidemiological models allow us to capture heterogeneity and individual-level details more effectively. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "112", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jin:2025:SSA, author = "Yufei Jin and Xingquan Zhu", title = "A Systematic Study and Analysis of Graph Neural Networks under Noise", journal = j-TKDD, volume = "19", number = "6", pages = "113:1--113:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3733605", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph Neural Networks (GNNs) have shown superb performance in handling networked data, mainly attributed to their message passing and convolution process across neighbors. For most literature, the performance of GNNs is mainly reported based on noise-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "113", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qiu:2025:OLN, author = "Jinjie Qiu and Shengda Zhuo and Philip S. Yu and Changdong Wang and Shuqiang Huang", title = "Online Learning for Noisy Labeled Streams", journal = j-TKDD, volume = "19", number = "6", pages = "114:1--114:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3734875", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Online learning, characterized by its feature space's adaptability over time, has emerged as a flexible learning paradigm that has attracted widespread attention. However, existing online learning methods often overlook the distributional differences \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "114", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guan:2025:MLS, author = "Yue Guan and Yumei He and Ni Huang and Xunhua Guo and Guoqing Chen", title = "Mining Linguistic Styles in Bilateral Matching: a Contrastive Learning Approach to Reciprocal Recommendation", journal = j-TKDD, volume = "19", number = "6", pages = "115:1--115:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736418", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Reciprocal recommendation systems are crucial for online dating platforms to provide quality matches and reduce choice overload. However, the design of reciprocal recommendation systems grapples with the challenges of estimating interpersonal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "115", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Feng:2025:DCS, author = "Jiadong Feng and Wei Li and Suhuang Wu and Zhao Wei and Yong Xu and Juhong Wang and Hui Li", title = "Deep Code Search with Naming-Agnostic Contrastive Multi-View Learning", journal = j-TKDD, volume = "19", number = "6", pages = "116:1--116:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737878", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Software development is a repetitive task, as developers usually reuse or get inspiration from existing implementations. Code search, which refers to the retrieval of relevant code snippets from a codebase according to the developer's intent that has \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "116", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:EED, author = "Zimu Wang and Jiashuo Liu and Hao Zou and Xingxuan Zhang and Yue He and Dongxu Liang and Peng Cui", title = "Exploring and Exploiting Data Heterogeneity in Recommendation", journal = j-TKDD, volume = "19", number = "6", pages = "117:1--117:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737290", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Massive amounts of data are the foundation of data-driven recommendation models. As an inherent nature of big data, data heterogeneity widely exists in real-world recommendation systems. It reflects the differences in the properties among sub-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "117", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guan:2025:FPF, author = "Wei Guan and Jian Cao and Haiyan Zhao and Yang Gu and Shiyou Qian", title = "{FeadSeq}: a Personalized Federated Anomaly Detection Framework for Discrete Event Sequences", journal = j-TKDD, volume = "19", number = "6", pages = "118:1--118:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3742896", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Event sequence anomaly detection has garnered considerable attention in research, encompassing applications such as identifying anomalies in system logs, anomalous transaction users, and so on. Yet, prevailing anomaly detection methods often rely solely \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "118", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:RDE, author = "Zhaodu Zhang and Yue Chao and Xuejun Ma", title = "Robust Distributed Estimation for Modal Regression under Least Squares Approximation", journal = j-TKDD, volume = "19", number = "6", pages = "119:1--119:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3742477", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Massive datasets pose a serious challenge to traditional statistical methods. Modal regression has greater robustness and high inference efficiency compared to mean regression and likelihood-based methods. In this article, we present a robust distributed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "119", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:DKG, author = "Rui Wang and Yanan Wang and Ziang Li and Haitao Cheng and Guozi Sun", title = "Distributed Keyword-guided Topic Model with Lexical Knowledge Supervision", journal = j-TKDD, volume = "19", number = "6", pages = "120:1--120:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737881", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Topic models are often used to discover latent semantic patterns from document collections. However, existing unsupervised approaches have the following drawbacks: (1) The mined topics may not match user interests; (2) They are prone to extract \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "120", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Subagdja:2025:DNB, author = "Budhitama Subagdja and D. Shanthoshigaa and Ah-Hwee Tan", title = "{DisambiguART}: a Neural-based Inference Model for Knowledge Graph Disambiguation", journal = j-TKDD, volume = "19", number = "6", pages = "121:1--121:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737880", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "One main challenge in constructing a Knowledge Graph (KG) is to deal with ambiguity. Specifically, an entity in the graph can be assigned with multiple meanings while two or more entities considered to have different meanings may actually be the same. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "121", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ting:2025:WAN, author = "Kai Ming Ting and Zhong Zhuang and Guansong Pang and Zongyou Liu and Tianrun Liang and Qiuran Zhao", title = "What Are Anomalies in a Network?", journal = j-TKDD, volume = "19", number = "6", pages = "122:1--122:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723007", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This article examines a collection of assumptions used in the current literature on node anomaly detection in a network. The examination raises the question: What are anomalies in a network? Our attempt to answer this question has provided some \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "122", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Potin:2025:PBG, author = "Lucas Potin and Rosa Figueiredo and Vincent Labatut and Christine Largeron", title = "Pattern-Based Graph Classification: Comparison of Quality Measures and Importance of Preprocessing", journal = j-TKDD, volume = "19", number = "6", pages = "123:1--123:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3743143", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph classification aims to categorize graphs based on their structural and attribute features, with applications in diverse fields such as social network analysis and bioinformatics. Among the methods proposed to solve this task, those relying on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "123", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:SFL, author = "Shangyang Li and Jiayan Guo", title = "Subgraph Federated Learning with Information Bottleneck Constrained Generative Learning", journal = j-TKDD, volume = "19", number = "6", pages = "124:1--124:??", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737879", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Federated Learning (FL) is a groundbreaking approach that enables multiple clients to jointly train deep learning models by pooling their data, while addressing privacy and bandwidth issues that prevent direct data sharing. This approach is particularly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "124", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Villaizan-Vallelado:2025:DMT, author = "Mario Villaiz{\'a}n-Vallelado and Matteo Salvatori and Carlos Segura and Ioannis Arapakis", title = "Diffusion Models for Tabular Data Imputation and Synthetic Data Generation", journal = j-TKDD, volume = "19", number = "6", pages = "125:1--125:32", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3742435", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Sat Jul 26 08:16:27 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", note = "See corrigendum \cite{Villaizan-Vallelado:2025:CDM}.", abstract = "Data imputation and data generation have important applications across many domains where incomplete or missing data can hinder accurate analysis and decision-making. Diffusion models have emerged as powerful generative models capable of capturing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "125", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Huang:2025:TSU, author = "Gengsen Huang and Wensheng Gan and Philip S. Yu", title = "Towards Sequence Utility Maximization under Utility Occupancy Measure", journal = j-TKDD, volume = "19", number = "7", pages = "126:1--126:27", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744344", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The discovery of utility-driven patterns is a valuable and difficult research topic. It can extract significant and interesting information from specific and varied databases, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "126", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:LRL, author = "Shichao Zhang and Penghui Xi and Mengqi Jiang and Guixian Zhang and Debo Cheng", title = "Latent Representation Learning for Attributed Graph Anomaly Detection", journal = j-TKDD, volume = "19", number = "7", pages = "127:1--127:22", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3733604", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Anomaly detection in attributed graph data has been widely applied in real applications. However, the intricate topology of graph data, high-dimensional attributes, and class \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "127", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Alptekin:2025:HSD, author = "Ece Alptekin and Berkay Kemal Balioglu and M. Emre Gursoy", title = "Hierarchical Spatial Decompositions under Local Differential Privacy", journal = j-TKDD, volume = "19", number = "7", pages = "128:1--128:37", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744569", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The popularity of smartphones, GPS-enabled devices, social networks, and connected vehicles all contribute to the increasing volume of spatial data. Spatial decompositions assist in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "128", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qian:2025:URD, author = "Fulan Qian and Wenbin Chen and Hai Chen and Yan Cui and Shu Zhao and Yanping Zhang", title = "Understanding the Robustness of Deep Recommendation under Adversarial Attacks", journal = j-TKDD, volume = "19", number = "7", pages = "129:1--129:46", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744570", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "It has been shown that deep recommendation models are susceptible to adversarial attacks, with this vulnerability potentially leading to significant economic losses in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "129", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhou:2025:MMA, author = "Jianping Zhou and Bin Lu and Zhanyu Liu and Siyu Pan and Xuejun Feng and Hua Wei and Guanjie Zheng and Xinbing Wang and Chenghu Zhou", title = "{MagiNet}: Mask-Aware Graph Imputation Network for Incomplete Traffic Data", journal = j-TKDD, volume = "19", number = "7", pages = "130:1--130:20", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3743141", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Due to detector malfunctions and communication failures, missing data is ubiquitous during the collection of traffic data. Therefore, it is of vital importance to impute the missing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "130", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tao:2025:CLS, author = "Zhen Tao and Dinghao Xi and Zhiyu Li and Liumin Tang and Wei Xu", title = "{CAT-LLM}: Style-enhanced Large Language Models with Text Style Definition for {Chinese} Article-style Transfer", journal = j-TKDD, volume = "19", number = "7", pages = "131:1--131:33", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744250", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Text style transfer plays a vital role in online entertainment and social media. However, existing models struggle to handle the complexity of Chinese long texts, such as rhetoric, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "131", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zong:2025:CGF, author = "Haoran Zong and Xiao Zhang and Ruichen Li and Jianhui Duan and Derun Zou and Wenzhong Li", title = "Convergence-Guaranteed Federated Learning through Gradient Trajectory Smoothing with Triple-Objective Decomposition", journal = j-TKDD, volume = "19", number = "7", pages = "132:1--132:31", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3743142", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Federated Learning (FL) has been widely adopted as a distributed machine learning paradigm aiming to derive a global model without transferring local data to the server. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "132", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2025:TRG, author = "Jian Chen and Yile Chen and Zeyi Wen and Yawen Chen and Jin Huang", title = "Towards Recommendation on Good Quality Data Science Solutions", journal = j-TKDD, volume = "19", number = "7", pages = "133:1--133:19", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746235", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data science aims to solve real-world problems with the knowledge derived from data. Successfully tackling a data science problem requires practitioners to choose an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "133", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2025:UGW, author = "Haifeng Sun and Yuanyi Wang and Han Li and Wei Tang and Zirui Zhuang and Qi Qi and Jingyu Wang", title = "Understanding and Guiding Weakly Supervised Entity Alignment with Potential Isomorphism Propagation", journal = j-TKDD, volume = "19", number = "7", pages = "134:1--134:28", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3742436", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Weakly Supervised Entity Alignment (EA) is the task of identifying equivalent entities across diverse knowledge graphs (KGs) using only a limited number of seed alignments. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "134", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Bu:2025:KGF, author = "Xiya Bu and Yu Liu", title = "Knowledge Graph Fine-grained Modeling Network with Contrastive Learning for Recommendation", journal = j-TKDD, volume = "19", number = "7", pages = "135:1--135:18", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744926", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge graph (KG) is often introduced into recommendation systems because of its large amount of edge information. The method based on graph neural networks (GNNs) has \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "135", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chinpattanakarn:2025:FVL, author = "Naaek Chinpattanakarn and Chainarong Amornbunchornvej", title = "Framework for Variable-Lag Motif Following Relation Inference in Time Series Using Matrix Profile Analysis", journal = j-TKDD, volume = "19", number = "7", pages = "136:1--136:24", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744652", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowing who follows whom and what patterns they are following are crucial steps to understand collective behaviors (e.g., a group of human, a school of fish, or a stock market). Time \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "136", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:PPP, author = "Bohao Li and Bowen Du and Junchen Ye", title = "{PRIME}: Pretraining for Patient Condition Representation with Irregular Multimodal Electronic Health Records", journal = j-TKDD, volume = "19", number = "7", pages = "137:1--137:39", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744251", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the increasing collection of electronic health records (EHRs), deep learning has become a crucial tool for real-time treatment analysis. However, due to patient privacy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "137", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:CML, author = "Huiting Liu and Wei Zhang and Peipei Li and Peng Zhao and Xindong Wu", title = "Causal Meta-learning with Multi-view Graphs for Cold-start Recommendation", journal = j-TKDD, volume = "19", number = "7", pages = "138:1--138:29", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3732943", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Cold-start recommendation is a well-known problem in practical application scenarios. Generating reliable recommendations can be challenging when interactions are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "138", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chakraborty:2025:WSO, author = "Mohna Chakraborty and Adithya Kulkarni and Qi Li", title = "Weakly Supervised Open-Domain Aspect-Based Sentiment Analysis", journal = j-TKDD, volume = "19", number = "7", pages = "139:1--139:29", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747849", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Aspect-Based Sentiment Analysis (ABSA) comprises several subtasks: aspect term extraction (ATE), opinion term extraction (OTE), aspect term sentiment extraction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "139", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2025:STD, author = "Hanchen Yang and Jiannong Cao and Wengen Li and Shuyu Wang and Hui Li and Jihong Guan and Shuigeng Zhou", title = "Spatial-Temporal Data Mining for Ocean Science: Data, Methodologies and Opportunities", journal = j-TKDD, volume = "19", number = "7", pages = "140:1--140:47", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748259", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "With the rapid amassing of spatial-temporal (ST) ocean data, many spatial-temporal data mining (STDM) studies have been conducted to address various oceanic issues, including \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "140", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:GSA, author = "Hao Liu and Dong Li and Bing Zeng and Wei Liang and Dongjie Li", title = "Graph Self-attention Mechanism for Interpretable Multi-hop Knowledge Graph Link Prediction", journal = j-TKDD, volume = "19", number = "7", pages = "141:1--141:22", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737702", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge Graphs (KGs) are extensively used in recommendation systems and information retrieval but often suffer from incompleteness. A popular solution to this problem is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "141", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2025:IFL, author = "Xingyu Zhao and Yuexuan An and Ning Xu and Lei Qi and Xin Geng", title = "Interactive Fusion Label Enhancement for Multi-Label Learning", journal = j-TKDD, volume = "19", number = "7", pages = "142:1--142:23", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744571", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multi-Label Learning (MLL) involves the task of assigning a set of relevant labels to a given instance. Recently, Label Enhancement (LE) has gained significant attention in various \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "142", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:RRA, author = "Jiaqi Wang and Wengen Li and Yulou Shu and Jihong Guan and Yichao Zhang and Shuigeng Zhou", title = "Raker: a Relation-Aware Knowledge Reasoning Model for Inductive Relation Prediction", journal = j-TKDD, volume = "19", number = "7", pages = "143:1--143:20", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3745029", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Inductive relation prediction, an important task for knowledge graph completion, is to predict the relations between entities that are unseen at the training stage. The latest methods use \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "143", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yerbury:2025:URV, author = "Luke W. Yerbury and Ricardo J. G. B. Campello and G. C. {Livingston, Jr.} and Mark Goldsworthy and Lachlan {O Neil}", title = "On the Use of Relative Validity Indices for Comparing Clustering Approaches", journal = j-TKDD, volume = "19", number = "8", pages = "144:1--144:53", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748726", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Relative Validity Indices (RVIs) such as the Silhouette Width Criterion, Calinski-Harabasz and Davies-Bouldin indices are the most widely used tools for evaluating and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "144", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jiang:2025:UUP, author = "Lu Jiang and Ruilou Zhang and Yanan Xiao and Kunpeng Liu and Kaidi Wang and Minghao Yin", title = "Understanding User Perspectives for {MOOC} Quality Evaluation with Hypergraph Learning", journal = j-TKDD, volume = "19", number = "8", pages = "145:1--145:19", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749845", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Evaluation of Massive Open Online Course (MOOC) quality is crucial to enhance the educational resources, benefiting user services, and enhancing students' learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "145", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2025:EFL, author = "Ji Liu and Beichen Ma and Qiaolin Yu and Ruoming Jin and Jingbo Zhou and Yang Zhou and Huaiyu Dai and Haixun Wang and Dejing Dou and Patrick Valduriez", title = "Efficient Federated Learning with Heterogeneous Data and Adaptive Dropout", journal = j-TKDD, volume = "19", number = "8", pages = "146:1--146:31", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749376", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Federated Learning (FL) is a promising distributed machine learning approach that enables collaborative training of a global model using multiple edge devices. The data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "146", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2025:SGS, author = "Jie Zhao and Chao Chen and Wanyi Zhang and Mingyu Deng and Huayan Pu and Jun Luo", title = "{SE-GCL}: a Semantic-Enhanced Graph Contrastive Learning Framework for Road Network Embedding", journal = j-TKDD, volume = "19", number = "8", pages = "147:1--147:27", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3757921", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Representation learning of road networks is essential for various downstream traffic-related tasks, as road network contain multi-modal data with rich information, and the learned \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "147", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:PMD, author = "Qian Li and Wenhao Zhang and Bojian Hu and Tun Li and Rong Wang and Shihong Wei and Yunpeng Xiao", title = "A Propagation Model of Derived Topic Based on Cognitive Accumulation and Transfer Learning", journal = j-TKDD, volume = "19", number = "8", pages = "148:1--148:24", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747187", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The propagation of hot topics often gives rise to a series of derivative topics. In view of the sparsity of user behavior data and the cognitive accumulation of the original topic, a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "148", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:GGT, author = "Qi Zhang and Mengmeng Si and Yanfeng Sun and Shaofan Wang and Junbin Gao and Baocai Yin", title = "{GFformer}: a Graph Transformer for Extracting All Frequency Information from Large-scale Graphs", journal = j-TKDD, volume = "19", number = "8", pages = "149:1--149:20", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3750051", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph Transformers have demonstrated outstanding performance across various graph-based applications. Despite their success, applying them to large-scale graphs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "149", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhuo:2025:OLM, author = "Shengda Zhuo and Di Wu and Yi He and Shuqiang Huang and Xindong Wu", title = "Online Learning from Mix-typed, Drifted, and Incomplete Streaming Features", journal = j-TKDD, volume = "19", number = "8", pages = "150:1--150:28", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744712", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Online learning, where feature spaces can change over time, offers a flexible learning paradigm that has attracted considerable attention. However, it still faces three \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "150", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2025:TAM, author = "Jingyuan Zhang and Lei Yu and Zhirong Huang and Li Yang and Fengjun Zhang", title = "Topology Augmented Multi-Band and Multi-Scale Filtering for Graph Anomaly Detection", journal = j-TKDD, volume = "19", number = "8", pages = "151:1--151:27", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748727", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Graph Anomaly Detection (GAD) has gained significant attention in areas such as financial risk control and social network security, becoming a critical research problem. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "151", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2025:IHG, author = "Xuchao Li and Peng Zhang and Ru Ma and Chenghang Huo and Fuzhi Zhang", title = "Integrating Heterogeneous Graph Attention Network with Label Propagation for Detecting Spammer Groups on E-Commerce Platforms", journal = j-TKDD, volume = "19", number = "8", pages = "152:1--152:28", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749846", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The collusive fraudulent behaviors on e-commerce platforms lead to proliferation of fraudulent reviews, which disrupt fair competition among merchants and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "152", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fu:2025:MGS, author = "Ningning Fu and Shengheng Liu and Weiliang Xie and Yongming Huang", title = "Multi-Grained Spatial-Temporal Feature Complementarity for Accurate Online Cellular Traffic Prediction", journal = j-TKDD, volume = "19", number = "8", pages = "153:1--153:27", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758099", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge discovered from telecom data can facilitate proactive understanding of network dynamics and user behaviors, which in turn empowers service providers to optimize \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "153", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2025:GSF, author = "Fei Ma and Ping Wang", title = "Growth Scale-Free Networks by Various Generative Ways", journal = j-TKDD, volume = "19", number = "8", pages = "154:1--154:32", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748512", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In this article, the popularly discussed topic, i.e., how to construct available theoretical networked models that certainly capture some structural features popularly observed on realistic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "154", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shen:2025:MLC, author = "Xiaoxuan Shen and Fenghua Yu and Qian Wan and Ruxia Liang and Jianwen Sun", title = "Multi-level Contrastive Learning for Knowledge Tracing", journal = j-TKDD, volume = "19", number = "8", pages = "155:1--155:29", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759920", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge Tracing (KT) is the task of predicting students' future performance based on their past interactions with educational resources. A key aspect of KT is representation learning, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "155", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Fitzpatrick:2025:NAA, author = "Padraig Fitzpatrick and Anna Jurek-Loughrey and Pawe{\l} D{\l}otko", title = "New Automated Approach to Selection of Mapper Clustering Parameters", journal = j-TKDD, volume = "19", number = "8", pages = "156:1--156:36", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746065", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Topological methods have recently gained traction as powerful tools for extracting insights from high-dimensional data, forming the foundation of an approach known as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "156", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Lazri:2025:BFA, author = "Zachary McBride Lazri and Danial Dervovic and Antigoni Polychroniadou and Ivan Brugere and Dana Dachman-Soled and Furong Huang and Min Wu", title = "Balancing Fairness and Accuracy in Data-Restricted Binary Classification", journal = j-TKDD, volume = "19", number = "8", pages = "157:1--157:40", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747850", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Fair decision-making in Machine Learning (ML) remains a critical challenge, particularly when access to sensitive information is restricted due to legal, ethical, or organizational \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "157", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jian:2025:AMI, author = "Yue Jian and Miao Zhang and Ziyue Qin and Chuyuan Xie and Kui Xiao and Yan Zhang and Zhifei Li", title = "Adaptive Modality Interaction Transformer for Multimodal Knowledge Graph Completion", journal = j-TKDD, volume = "19", number = "8", pages = "158:1--158:24", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760786", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Knowledge graphs (KGs) are frequently confronted with the challenge of incompleteness, a problem that extends to multimodal knowledge graphs (MKGs). The primary goal of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "158", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:MMA, author = "Chengsen Wang and Qi Qi and Jinming Wu and Haifeng Sun and Zirui Zhuang and Yuhan Jing and Lianyuan Li and Jingyu Wang", title = "{MCAKE}: Memory-Augmented Autoencoder with Contrastive Learning for Unsupervised Anomaly Detection", journal = j-TKDD, volume = "19", number = "8", pages = "159:1--159:18", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759460", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recently, reconstruction-based deep models have gained widespread usage in unsupervised anomaly detection. However, they may overlook some anomalies owing to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "159", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kou:2025:OVI, author = "Yannian Kou and Qiuqiang Lin and Yunhao Wen and Di Fan and Chuanhou Gao", title = "{ORIC V2}: Improved Feature Interaction Detection Model through Online Random Interaction Chains for Click-Through Rate Prediction", journal = j-TKDD, volume = "19", number = "8", pages = "160:1--160:26", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762667", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Predicting the probability that a user clicks a specific item is fundamental in online advertising and recommendation. Further, it is crucial to use the latest and historical data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "160", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tang:2025:CNI, author = "Yingxia Tang and Yanxuan Wei and Yupeng Hu and Xiangwei Zheng and Cun Ji", title = "Convolutional Network Integrated with Frequency Adaptive Learning for Multivariate Time Series Classification", journal = j-TKDD, volume = "19", number = "8", pages = "161:1--161:23", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761818", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Multivariate time series classification (MTSC) is a significant research topic in the realm of data mining, with broad applications in different industries, including healthcare, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "161", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Villaizan-Vallelado:2025:CDM, author = "Mario Villaiz{\'a}n-Vallelado and Matteo Salvatori and Carlos Segura and Ioannis Arapakis", title = "Corrigendum: Diffusion Models for Tabular Data Imputation and Synthetic Data Generation", journal = j-TKDD, volume = "19", number = "8", pages = "1:1--1:??", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761939", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Oct 2 10:54:18 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", note = "See \cite{Villaizan-Vallelado:2025:DMT}.", abstract = "This is a corrigendum for the article ``Diffusion Models for Tabular Data Imputation and Synthetic Data Generation'' published in ACM Trans. Knowl. Discov. Data 19(6): \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "C1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Berahmand:2025:REB, author = "Kamal Berahmand and Mehrnoush Mohammadi and Razieh Sheikhpour and Mahdi Jalili and Richi Nayak and Hassan Khosravi", title = "Relative Entropy-based Regularized Non-negative Matrix Factorization for Attributed Graph Clustering", journal = j-TKDD, volume = "19", number = "9", pages = "162:1--162:28", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765742", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Attributed graph clustering is a fundamental task in network mining, essential for uncovering valuable insights in various applications. However, the heterogeneity of information from structural and attribute spaces poses significant challenges in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "162", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xiong:2025:PPN, author = "Guangzhi Xiong and Sanchit Sinha and Aidong Zhang", title = "{ProtoNAM}: Prototypical Neural Additive Models for Interpretable Deep Tabular Learning", journal = j-TKDD, volume = "19", number = "9", pages = "163:1--163:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3766072", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Generalized Additive Models (GAMs) have long been a powerful white-box tool for the intelligible analysis of tabular data, revealing the influence of each feature on the model predictions. Despite the success of Neural Networks (NNs) in various domains, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "163", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kong:2025:DVA, author = "Xiangjie Kong and Siyue Shuai and Hui Wang and Guojiang Shen and Feng Xia", title = "Dual-View Anomaly Detection in Heterogeneous Information Networks with Hierarchical Neighborhood Fusion", journal = j-TKDD, volume = "19", number = "9", pages = "164:1--164:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767156", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The primary objective of graph node anomaly detection is to pinpoint rare patterns that display marked deviations from the typical one. Existing methods utilize Graph Convolutional Networks (GCNs) to model complex interactions in Heterogeneous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "164", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tu:2025:WOI, author = "Cheng Tu and Yunshan Ma and Yang Li and Min Zhang and Miao Hu and Fan Shi and Xiang Wang", title = "{Website} Owner Identification through Multi-level Contrastive Representation Learning", journal = j-TKDD, volume = "19", number = "9", pages = "165:1--165:39", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767155", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Website owner identification aims to recognize the organization or individual who owns a given website that is served on the web. It is a crucial step for cyberspace surveying and mapping, playing a significant role in cyberspace administration and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "165", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Jian:2025:IDC, author = "Meng Jian and Ruoxi Li and Meishan Liu and Meijuan Yang and Shaona Wang and Lifang Wu", title = "Interest-Disentangled Contrastive Sample Generation for Recommendation", journal = j-TKDD, volume = "19", number = "9", pages = "166:1--166:20", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3768160", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In the domain of recommendations, previous works often retrieve items through sampling strategies from the database to gather negative signals for exploring implicit feedback. However, because of extremely sparse records, the existing items used as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "166", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:RPM, author = "Rong Wang and Zerui Wu and Liangyu Wang and Chaolong Jia and Yunpeng Xiao", title = "A Rumor Propagation Model Based on User Cognition and Evolutionary Game", journal = j-TKDD, volume = "19", number = "9", pages = "167:1--167:33", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767161", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In social networks, studying rumor propagation patterns is essential for curbing the spread of rumors. Given the coexistence and conflict of multiple-type rumor information, as well as users' cognitive differences, this article presents a rumor \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "167", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2025:TRI, author = "Hanchen Yang and Jiannong Cao and Wengen Li and Yu Yang and Xiaoyi Li and Lingbai Kong and Yichao Zhang and Jihong Guan and Shuigeng Zhou", title = "Towards Robust and Interpretable Spatial-Temporal Graph Modeling for Traffic Prediction", journal = j-TKDD, volume = "19", number = "9", pages = "168:1--168:20", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769297", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Accurate spatial-temporal (ST) traffic prediction plays an essential role in intelligent transportation systems. Existing advanced traffic prediction methods typically utilize spatial-temporal graph neural networks (STGNNs) to capture the ST correlations \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "168", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gong:2025:HIS, author = "Fang Gong and Tao Lu and Kuayue Liu", title = "Hidden Inverted Specific-Class Distance Measure for Nominal Attributes", journal = j-TKDD, volume = "19", number = "9", pages = "169:1--169:17", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769293", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The inverted specific-class distance measure (ISCDM) is a popular distance metric that uses conditional probability term to calculate the distance between two nominal attribute values, but the reliability of the conditional probability term is limited by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "169", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Yang:2025:SSU, author = "Jinjing Yang and Shaohua Xu and Zebin Yang and Aijun Zhang and Yongdao Zhou", title = "Stable Subsampling under Model Misspecification and Covariate Shift", journal = j-TKDD, volume = "19", number = "9", pages = "170:1--170:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769077", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "The presence of covariate shift between training and test datasets, coupled with model misspecification, can lead to instability in regression predictions across diverse datasets. Meanwhile, training complex models with massive data imposes significant \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "170", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Xie:2025:AFD, author = "Tianyang Xie and Yong Ge and Shuojia Guo", title = "Accuracy, Fairness, Diversity All at Once: an Influence-Function-Guided Data Enhancement Approach for Recommender System", journal = j-TKDD, volume = "19", number = "9", pages = "171:1--171:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3768316", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Recommender systems play a pivotal role in curating high-quality content for users, predominantly leveraging data-driven algorithms and machine learning methodologies. However, the intrinsic data-centric nature of these systems raises critical concerns; \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "171", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zuin:2025:ELT, author = "Gianlucca Zuin and Adriano Veloso", title = "{``A 6 or a 9?'}': Ensemble Learning through the Multiplicity of Performant Models and Explanations", journal = j-TKDD, volume = "19", number = "9", pages = "172:1--172:39", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767735", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Creating models from past observations and ensuring their effectiveness on new data is the essence of machine learning. However, selecting models that generalize well remains a challenging task. Related to this topic, the Rashomon Effect refers to cases \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "172", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Moorthy:2025:TFD, author = "Adithya K. Moorthy and Jaya Teja Reddy Pochimireddy and Vijaya Saradhi Vedula and Bhanu Prasad", title = "Towards Fair Decision Boundaries in Clustering: Integrating Disparate Impact Criteria into Maximum Margin Clustering", journal = j-TKDD, volume = "19", number = "9", pages = "173:1--173:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3770078", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Extensive application of machine learning in the areas that impact human lives has significantly spurred considerable interest in developing algorithms that are demonstrably fair. Recent efforts in this field have led to the creation of numerous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "173", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2025:IGC, author = "Guobang Chen and Wenjun Jiang and Kenli Li and Jingjing Wang and Jie Wu and Kian-Lee Tan", title = "Integrating Group Consensus for Competitive Influence Maximization in {OSNs}", journal = j-TKDD, volume = "19", number = "9", pages = "174:1--174:38", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3768583", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In online social networks (OSNs), people usually join groups for communication. Information diffusion often occurs with some cost, either between individuals or within/among groups; and different opinions may compete with each other. The groups can make \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "174", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2025:EDP, author = "Yuhan Wang and Qing Xie and Mengzi Tang and Lin Li and Jingling Yuan and Yongjian Liu", title = "Erratum: {A} Dual Perspective Framework of Knowledge-correlation for Cross-domain Recommendation", journal = j-TKDD, volume = "19", number = "9", pages = "C2:1--C2:2", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767751", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Dec 8 10:20:03 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", note = "See \cite{Wang:2024:DPF}.", abstract = "This is an erratum for the article ``A Dual Perspective Framework of Knowledge-correlation for Cross-domain Recommendation'' published in ACM Trans. Knowl. Discov. Data 18(6): 152:1-152:28 (2024).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "C2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Su:2026:JOD, author = "Weikang Su and Haoqiang Liu and Tong Li and Xingzai Lv and Hua Rui and Wenzhen Huang and Zhaocheng Wang and Yong Li", title = "Jointly Optimizing Deployment and Antenna of Base Stations Using Hierarchical Reinforcement Learning", journal = j-TKDD, volume = "20", number = "1", pages = "1:1--1:25", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3763795", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "1", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2026:MDC, author = "Feijiang Li and Xin Liu and Jieting Wang and Yuhua Qian", title = "{MCSS}: Discovering Consistently Determined Relation in Multi-View Clustering Based on Sample's Stability", journal = j-TKDD, volume = "20", number = "1", pages = "2:1--2:28", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3771274", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "2", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2026:PSS, author = "Shengyin Sun and Chen Ma and Jiehao Chen", title = "{PHE}: Structure and Semantic Enhanced Pre-Training of Graph Neural Networks for Large-Scale Heterogeneous Graphs", journal = j-TKDD, volume = "20", number = "1", pages = "3:1--3:26", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3772278", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "3", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liu:2026:MFL, author = "Qinghe Liu and Mingming Jiang and Pin Wang and Hongli Xu and Rilige Wu and Zhenfeng Zhu and Xinwang Liu and Yawei Zhao and Kunlun He", title = "Medical Federated Learning with Improved Representation and Personalized Aggregation", journal = j-TKDD, volume = "20", number = "1", pages = "4:1--4:31", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3737649", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "4", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Shi:2026:SHS, author = "Hongyu Shi and Ling Chen and Qian Chen", title = "{SR-HyperFM}: Sample Relationship Aware Hypergraph Factorization Machines for Feature Interaction Modeling", journal = j-TKDD, volume = "20", number = "1", pages = "5:1--5:19", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3773082", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "5", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhao:2026:SSO, author = "Yusheng Zhao and Xiao Luo and Junyu Luo and Wei Ju and Zhonghui Gu and Zhiping Xiao and Xian-Sheng Hua and Ming Zhang", title = "{SPOT}: Spectral Optimal Transport for Graph Domain Generalization", journal = j-TKDD, volume = "20", number = "1", pages = "6:1--6:22", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3772720", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "6", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Di:2026:GDG, author = "Zijun Di and Peng Zheng and Bin Lu and Kai Guan and Luoyi Fu and Ningdi Jin and Ye Chen and Xiaoying Gan and Lei Zhou and Xinbing Wang and Chenghu Zhou", title = "Graph Out-of-Distribution Generalization Based on Structural-Entropy-Guided Information Bottleneck", journal = j-TKDD, volume = "20", number = "1", pages = "7:1--7:34", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3767162", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "7", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chuang:2026:PPR, author = "Yu-Neng Chuang and Cheng-Te Li", title = "Privacy-Preserving Representation Learning with Gradient Obfuscation against Attribute Inference for Recommendation", journal = j-TKDD, volume = "20", number = "1", pages = "8:1--8:23", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3773987", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "8", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Qin:2026:FSA, author = "Tianrui Qin and Xuan Wang and Xianghuan He and Yiren Zhao and Kejiang Ye and Cheng-Zhong Xu and Xitong Gao", title = "{Flareon}: Stealthy All2all Backdoor Injection via Poisoned Augmentation", journal = j-TKDD, volume = "20", number = "1", pages = "9:1--9:23", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774648", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "9", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ying:2026:TAR, author = "Wangyang Ying and Haoyue Bai and Kunpeng Liu and Yanjie Fu", title = "Topology-aware Reinforcement Feature Space Reconstruction for Graph Data", journal = j-TKDD, volume = "20", number = "1", pages = "10:1--10:22", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774423", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "10", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wu:2026:SSC, author = "Yue Wu and Mingyu Zhao and Songming Zhang and Ziyu Lyu and Dayong Peng and Wanji Zheng and Hua Chai", title = "A Semi-supervised Co-training Algorithm for Robust Recommendation", journal = j-TKDD, volume = "20", number = "1", pages = "11:1--11:25", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3773988", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "11", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Wang:2026:DDS, author = "Yingxu Wang and Nan Yin and Mingyan Xiao and Xinhao Yi and Siwei Liu and Shangsong Liang", title = "{DuSEGO}: Dual Second-Order Equivariant Graph Ordinary Differential Equation", journal = j-TKDD, volume = "20", number = "1", pages = "12:1--12:18", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774321", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "12", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Hu:2026:GAP, author = "Chenxi Hu and Yifan Hu and Yunxiang Zhao and Tao Wu and Meng Zhang and Chunsheng Liu and Yangyi Hu", title = "{Gaussian}-Augmented Prototypical Network for Class-Incremental Few-Shot Relation Classification", journal = j-TKDD, volume = "20", number = "1", pages = "13:1--13:31", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774420", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "13", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Conte:2026:FAE, author = "Alessio Conte and Roberto Grossi and Grigorios Loukides and Nadia Pisanti and Solon P. Pissis and Giulia Punzi", title = "Fast Assessment of {Eulerian} Trails in Graphs with Applications", journal = j-TKDD, volume = "20", number = "1", pages = "14:1--14:33", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3771997", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "14", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{He:2026:IDF, author = "Yi-Xiao He and Shen-Huan Lyu and Yuan Jiang", title = "Interpreting Deep Forest through Feature Contribution and {MDI} Feature Importance", journal = j-TKDD, volume = "20", number = "1", pages = "15:1--15:21", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3641108", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "15", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guo:2026:EDG, author = "Xiaojie Guo and Yuanqi Du and Zheng Zhang and Liang Zhao", title = "Efficient Deep Generative Models for Spatial Networks via Spanning Tree Sampler", journal = j-TKDD, volume = "20", number = "1", pages = "16:1--16:28", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774416", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "16", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2026:CCM, author = "Erchao Li and Hongxu Li and Yu Peng", title = "Competitive-Cooperative Multi-Task Optimization Algorithm with Historical Success Archive", journal = j-TKDD, volume = "20", number = "1", pages = "17:1--17:45", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3775058", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "17", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ding:2026:TPM, author = "Xiaojian Ding and Xiaoying Zhu and Fumin Ma", title = "Tri-perspective Multi-view Classification", journal = j-TKDD, volume = "20", number = "1", pages = "18:1--18:21", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3777462", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Jan 15 06:36:40 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "18", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Ma:2026:IFB, author = "Boxuan Ma and Sora Fukui and Yuji Ando and Shin'ichi Konomi", title = "Integrating Forgetting Behavior and Linguistic Features in Language Learning Models", journal = j-TKDD, volume = "20", number = "2", pages = "19:1--19:26", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3778163", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Language learning applications usually estimate the learner's language knowledge over time to provide personalized practice content for each learner at the optimal timing. However, accurately predicting language knowledge or linguistic skills is much \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "19", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{HaCohen-Kerner:2026:EDA, author = "Yaakov HaCohen-Kerner and Natan Manor and Michael Goldmeier and Eytan Bachar", title = "Early Detection of Anorexia in Blog Posts Written in {English}", journal = j-TKDD, volume = "20", number = "2", pages = "20:1--20:28", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3779415", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This study concentrates on identifying girls with anorexia nervosa through English social media text analysis. A dataset was created comprising 100 blog posts authored by females who have anorexia and another 100 posts written by females likely without \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "20", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2026:TCD, author = "Zhiwei Li and Cheng Wang", title = "Towards Contactless Data-Model Matching", journal = j-TKDD, volume = "20", number = "2", pages = "21:1--21:32", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774939", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Data-model matching, typically achieved through direct contact, is critical to digital markets. However, when data and models belong to different owners, the direct contact-based form faces some security threats, including data security, privacy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "21", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Sun:2026:IHS, author = "Xigang Sun and Jiahui Jin and Haojia Zhu and Wenchao Bai and Xin Lin", title = "Integrating Heterogeneous Spatio-Temporal Interactions for Traffic Speed Prediction", journal = j-TKDD, volume = "20", number = "2", pages = "22:1--22:26", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3783987", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Predicting traffic speed is a crucial task in intelligent transportation systems, as it helps analyze traffic congestion and improve road flow. The complex spatio-temporal interactions present in traffic data make accurate predictions challenging. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "22", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Meena:2026:EIM, author = "Sunil Kumar Meena and Shashank Sheshar Singh and Kuldeep Singh", title = "Exploring Influence Maximization: State-of-the-Art Methods, Taxonomies, and Trends", journal = j-TKDD, volume = "20", number = "2", pages = "23:1--23:50", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3779058", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Influence maximization (IM) is a key problem in social network analysis, with numerous applications in areas such as viral marketing, advertising, public health, and more. Current surveys in IM primarily focus on traditional algorithms while overlooking \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "23", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Loru:2026:CBA, author = "Edoardo Loru and Niccol{\`o} {Di Marco} and Matteo Cinelli and Walter Quattrociocchi", title = "A Compression-Based Approach to Detecting Automated and Coordinated Behavior on Social Media", journal = j-TKDD, volume = "20", number = "2", pages = "24:1--24:25", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3778356", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Social media platforms are frequently targeted by entities engaging in automated or coordinated behavior, aiming to manipulate public opinion or conduct information operations without revealing their synthetic or managed nature. Research on detecting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "24", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Kuang:2026:CDD, author = "Taojie Kuang and Qianli Ma and Athanasios V. Vasilakos and Yu Wang and Qiang Cheng and Zhixiang Ren", title = "Concept-Driven Deep Learning for Enhanced Protein-Specific Molecular Generation", journal = j-TKDD, volume = "20", number = "2", pages = "25:1--25:17", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3779063", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In recent years, deep learning techniques have made significant strides in molecular generation for specific targets, driving advancements in drug discovery. However, existing molecular generation methods present significant limitations: those operating \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "25", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Gullo:2026:PCM, author = "Francesco Gullo and Domenico Mandaglio and Andrea Tagarelli", title = "Polarized Communities Meet Densest Subgraph: Efficient and Effective Polarization Detection in Signed Networks", journal = j-TKDD, volume = "20", number = "2", pages = "26:1--26:26", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3779064", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Signed networks represent interactions among users (nodes), with edges labeled as positive for friendly relations and negative for antagonistic ones. The 2-Polarized-Communities ( 2pc ) combinatorial optimization problem seeks two disjoint polarized \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "26", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Liao:2026:UED, author = "Jiyun Liao and Chenglong Dai and Guanghui Li", title = "Unsupervised {EEG} Decoding with Frequency-Trend-Based Information Granule Learning", journal = j-TKDD, volume = "20", number = "2", pages = "27:1--27:36", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3777386", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Electroencephalogram (EEG)-driven innovations have gained prominence as pivotal research frontiers in various domains, including neurological disorder diagnostics, sensorimotor rehabilitation, and brain-computer interfaces (BCIs), and so on. Traditional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "27", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Geng:2026:AGP, author = "Jing Geng and Shangxian Zhao and Wang Weizhe and Qi Li", title = "Adaptive Graph Partitioning for Clustering Datasets with Heterogeneous Density", journal = j-TKDD, volume = "20", number = "2", pages = "28:1--28:27", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3785361", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In recent years, graph-partition-based clustering algorithms have attracted increasing attention. These algorithms first construct a graph over the data points and then partition this graph, regarding each connected subgraph in the partitioned graph as a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "28", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Guan:2026:RNN, author = "Hui Guan and Umang Chaudhary and Yuanchao Xu and Lin Ning and Lijun Zhang and Xipeng Shen", title = "Recurrent Neural Networks Meet Context-Free Grammar: Two Birds with One Stone", journal = j-TKDD, volume = "20", number = "2", pages = "29:1--29:22", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3785464", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "This work addresses a key challenge in the effective adoption of Recurrent Neural Networks (RNNs) by reducing inference time and expanding the scope of a prediction. It introduces compressed learning, a novel approach that integrates Context-Free Grammar \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "29", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2026:BNM, author = "Youhua Li and Ersheng Ni and Yihao Liu and Sibo Xu and Tianyi Xu and Mingxuan Wu and Junchen Fu and Yucheng Zhang and Yuanqi He and Xinyuan Song and Yongxin Ni", title = "Bridging {NIP} and {MLM}: a Unified Meta-Learning Framework for Sequential Recommendation", journal = j-TKDD, volume = "20", number = "2", pages = "30:1--30:19", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3786346", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Sequential Recommender Systems (SRSs) predict items of interest for users based on their historical interactions. Two key popular paradigms for SRs are unidirectional Next Item Prediction (NIP) and bidirectional Masked Language Modeling (MLM). NIP \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "30", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Zhang:2026:SDG, author = "Kexin Zhang and Shuhan Liu and Song Wang and Weili Shi and Chen Chen and Pan Li and Sheng Li and Jundong Li and Kaize Ding", title = "A Survey of Deep Graph Learning under Distribution Shifts: From Graph Out-of-Distribution Generalization to Adaptation", journal = j-TKDD, volume = "20", number = "2", pages = "31:1--31:38", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3785475", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Distribution shifts on graphs-the discrepancies in data distribution between training and employing a graph machine learning model-are ubiquitous and often unavoidable in real-world applications. These shifts may severely deteriorate model performance, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "31", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Chen:2026:HBF, author = "Jianrui Chen and Peijie Wang and Maoguo Gong and Xuehui Zhao", title = "Hyperbolic-based Feature Learning for Temporal Knowledge Graph Relation Prediction", journal = j-TKDD, volume = "20", number = "2", pages = "32:1--32:23", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3786587", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "In the realm of real-world knowledge graphs, the dynamism of facts is a prevailing characteristic. To illustrate, a popular restaurant was awarded a Michelin star in 2004 and retained this prestigious recognition in 2008, but lost it in 2012 due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Tian:2026:CIG, author = "Changyuan Tian and Li Jin and Zequn Zhang and Zhicong Lu and Wen Shi and Jianhua Yin and Shiyao Yan and Zhi Guo", title = "Consistency and Invariance Guided Multi-View Hypergraph Learning for Robust Hyperedge Prediction", journal = j-TKDD, volume = "20", number = "2", pages = "33:1--33:24", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3787102", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Hypergraphs, by extending traditional graphs with hyperedges, enable the modeling and prediction of complex higher-order interactions that go beyond simple pairwise interactions. Hyperedge prediction, an evolution of link prediction, aims to identify \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "33", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Deng:2026:MVB, author = "Xinyan Deng and Jiajie Tan and Xiaorou Zheng and Shoubin Dong", title = "Multi-Vector Biomedical Dense Retrieval with Knowledge-Enhanced Entity-Type Clustering", journal = j-TKDD, volume = "20", number = "2", pages = "34:1--34:24", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3785368", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Single-vector dense retrieval models, which are foundational to modern Retrieval-Augmented Generation (RAG) systems, struggle to represent the multifaceted semantics of complex documents, particularly in specialized fields like biomedicine. This semantic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "34", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", } @Article{Li:2026:MPM, author = "Xinyang Li and Yanmin Zhu and Chunyang Wang and Jiadi Yu and Feilong Tang", title = "{MESA}: Plugin Meta-Modulation for Transformer-Based Cold-Start Sequential Recommendation", journal = j-TKDD, volume = "20", number = "2", pages = "35:1--35:19", month = feb, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3788282", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Thu Mar 5 11:36:46 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Sequential recommenders aim to enhance prediction accuracy by leveraging user interaction sequences, with transformer-based models showing particularly strong performance. Among them, cold-start sequential recommenders are particularly challenging \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Knowl. Discov. Data", articleno = "35", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "https://dl.acm.org/loi/tkdd", }